forked from OSchip/llvm-project
Add llvm.codegen intrinsic patch file to polly/utils.
The "llvm.codegen" intrinsic patch is a patch to LLVM, which is used to generate code for embedded LLVM-IR strings. In Polly, we use it to generate ptx assembly text for GPGPU code generation. llvm-svn: 157689
This commit is contained in:
parent
547ea51a73
commit
8d7c4dbf8c
|
@ -0,0 +1,492 @@
|
|||
From 28e525d58b236efa5db562c2931c996e26d6996b Mon Sep 17 00:00:00 2001
|
||||
From: Yabin Hu <yabin.hwu@gmail.com>
|
||||
Date: Wed, 23 May 2012 09:19:36 +0800
|
||||
Subject: [PATCH] Add llvm.codegen intrinsic.
|
||||
|
||||
The llvm.codegen intrinsic generates code for embedded LLVM-IR
|
||||
strings. Each call to the intrinsic is replaced by a pointer to
|
||||
the newly generated target code. The code generation target can be
|
||||
different to the one of the parent module.
|
||||
---
|
||||
docs/LangRef.html | 36 +++
|
||||
include/llvm/CodeGen/Passes.h | 3 +
|
||||
include/llvm/InitializePasses.h | 1 +
|
||||
include/llvm/Intrinsics.td | 4 +
|
||||
lib/CodeGen/CMakeLists.txt | 1 +
|
||||
lib/CodeGen/CodeGen.cpp | 1 +
|
||||
lib/CodeGen/CodeGenIntrinsic.cpp | 229 ++++++++++++++++++++
|
||||
lib/CodeGen/Passes.cpp | 3 +
|
||||
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +
|
||||
lib/Target/LLVMBuild.txt | 2 +-
|
||||
lib/VMCore/Verifier.cpp | 10 +
|
||||
.../CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll | 28 +++
|
||||
test/CodeGen/X86/EmbeddedCG/lit.local.cfg | 5 +
|
||||
13 files changed, 325 insertions(+), 1 deletions(-)
|
||||
create mode 100644 lib/CodeGen/CodeGenIntrinsic.cpp
|
||||
create mode 100644 test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
|
||||
create mode 100644 test/CodeGen/X86/EmbeddedCG/lit.local.cfg
|
||||
|
||||
diff --git a/docs/LangRef.html b/docs/LangRef.html
|
||||
index 8f7a17c..23d73bd 100644
|
||||
--- a/docs/LangRef.html
|
||||
+++ b/docs/LangRef.html
|
||||
@@ -242,6 +242,7 @@
|
||||
<li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
|
||||
<li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
|
||||
<li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
|
||||
+ <li><a href="#int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><a href="#int_libc">Standard C Library Intrinsics</a>
|
||||
@@ -7015,6 +7016,41 @@ LLVM</a>.</p>
|
||||
|
||||
</div>
|
||||
|
||||
+<!-- _______________________________________________________________________ -->
|
||||
+<h4>
|
||||
+ <a name="int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a>
|
||||
+</h4>
|
||||
+
|
||||
+<div>
|
||||
+
|
||||
+<h5>Syntax:</h5>
|
||||
+<pre>
|
||||
+ declare i8* @llvm.codegen(i8* <IRString>, i8* <MCPU>, i8* <
|
||||
+ Features>)
|
||||
+</pre>
|
||||
+
|
||||
+<h5>Overview:</h5>
|
||||
+<p>The '<tt>llvm.codegen</tt>' intrinsic uses the LLVM back ends to generate
|
||||
+ code for embedded LLVM-IR strings. The code generation target can be
|
||||
+ different to the one of the parent module.</p>
|
||||
+
|
||||
+<h5>Arguments:</h5>
|
||||
+<p><tt>IRString</tt> is a string containing LLVM-IR.</p>
|
||||
+<p><tt>MCPU</tt> is the name of the target CPU.</p>
|
||||
+<p><tt>Features</tt> is the string representation of the additional target
|
||||
+ features.</p>
|
||||
+
|
||||
+<h5>Semantics:</h5>
|
||||
+<p>The '<tt>llvm.codegen</tt>' intrinsic transforms a string containing LLVM IR
|
||||
+ to target assembly code. Calls to the intrinsic are replaced by a pointer to
|
||||
+ the newly generated target code. In case LLVM can not generate code (e.g. the
|
||||
+ target is not available), the call to the intrinsic is replaced by a i8 NULL
|
||||
+ pointer.Users of this intrinsic should make sure the target triple is
|
||||
+ properly set in the <IRString>. Inputs to both <MCPU> and
|
||||
+ <Features> parameters can be null pointers.</p>
|
||||
+
|
||||
+</div>
|
||||
+
|
||||
</div>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
|
||||
index e76fe99..ecf3e4b 100644
|
||||
--- a/include/llvm/CodeGen/Passes.h
|
||||
+++ b/include/llvm/CodeGen/Passes.h
|
||||
@@ -373,6 +373,9 @@ namespace llvm {
|
||||
/// branch folding).
|
||||
extern char &GCMachineCodeAnalysisID;
|
||||
|
||||
+ /// CodeGenIntrinsic Pass - Create target code for embedded LLVM-IR strings.
|
||||
+ FunctionPass *createCodeGenIntrinsicPass();
|
||||
+
|
||||
/// Deleter Pass - Releases GC metadata.
|
||||
///
|
||||
FunctionPass *createGCInfoDeleter();
|
||||
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
|
||||
index df696b1..28b146f 100644
|
||||
--- a/include/llvm/InitializePasses.h
|
||||
+++ b/include/llvm/InitializePasses.h
|
||||
@@ -91,6 +91,7 @@ void initializeCorrelatedValuePropagationPass(PassRegistry&);
|
||||
void initializeDAEPass(PassRegistry&);
|
||||
void initializeDAHPass(PassRegistry&);
|
||||
void initializeDCEPass(PassRegistry&);
|
||||
+void initializeCodeGenIntrinsicPass(PassRegistry&);
|
||||
void initializeDSEPass(PassRegistry&);
|
||||
void initializeDeadInstEliminationPass(PassRegistry&);
|
||||
void initializeDeadMachineInstructionElimPass(PassRegistry&);
|
||||
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
|
||||
index 75162bf..4bcec16 100644
|
||||
--- a/include/llvm/Intrinsics.td
|
||||
+++ b/include/llvm/Intrinsics.td
|
||||
@@ -226,6 +226,10 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
|
||||
// guard to the correct place on the stack frame.
|
||||
def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
|
||||
|
||||
+//===----------------- Code Generation for Embedded LLVM-IR ---------------===//
|
||||
+def int_codegen : Intrinsic<[llvm_ptr_ty],
|
||||
+ [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty]>;
|
||||
+
|
||||
//===------------------- Standard C Library Intrinsics --------------------===//
|
||||
//
|
||||
|
||||
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
|
||||
index 855fa0c..4451922 100644
|
||||
--- a/lib/CodeGen/CMakeLists.txt
|
||||
+++ b/lib/CodeGen/CMakeLists.txt
|
||||
@@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen
|
||||
CalcSpillWeights.cpp
|
||||
CallingConvLower.cpp
|
||||
CodeGen.cpp
|
||||
+ CodeGenIntrinsic.cpp
|
||||
CodePlacementOpt.cpp
|
||||
CriticalAntiDepBreaker.cpp
|
||||
DeadMachineInstructionElim.cpp
|
||||
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
|
||||
index a81bb5c..662345a 100644
|
||||
--- a/lib/CodeGen/CodeGen.cpp
|
||||
+++ b/lib/CodeGen/CodeGen.cpp
|
||||
@@ -21,6 +21,7 @@ using namespace llvm;
|
||||
void llvm::initializeCodeGen(PassRegistry &Registry) {
|
||||
initializeBranchFolderPassPass(Registry);
|
||||
initializeCalculateSpillWeightsPass(Registry);
|
||||
+ initializeCodeGenIntrinsicPass(Registry);
|
||||
initializeCodePlacementOptPass(Registry);
|
||||
initializeDeadMachineInstructionElimPass(Registry);
|
||||
initializeExpandPostRAPass(Registry);
|
||||
diff --git a/lib/CodeGen/CodeGenIntrinsic.cpp b/lib/CodeGen/CodeGenIntrinsic.cpp
|
||||
new file mode 100644
|
||||
index 0000000..01253cd
|
||||
--- /dev/null
|
||||
+++ b/lib/CodeGen/CodeGenIntrinsic.cpp
|
||||
@@ -0,0 +1,229 @@
|
||||
+//===-- CodeGenIntrinsic.cpp - CodeGen Intrinsic --------------------------===//
|
||||
+//
|
||||
+// The LLVM Compiler Infrastructure
|
||||
+//
|
||||
+// This file is distributed under the University of Illinois Open Source
|
||||
+// License. See LICENSE.TXT for details.
|
||||
+//
|
||||
+//===----------------------------------------------------------------------===//
|
||||
+//
|
||||
+// This file implements the llvm.codegen intrinsic.
|
||||
+//
|
||||
+//===----------------------------------------------------------------------===//
|
||||
+
|
||||
+#include "llvm/CodeGen/Passes.h"
|
||||
+#include "llvm/CallingConv.h"
|
||||
+#include "llvm/IntrinsicInst.h"
|
||||
+#include "llvm/LLVMContext.h"
|
||||
+#include "llvm/Module.h"
|
||||
+#include "llvm/PassManager.h"
|
||||
+#include "llvm/Assembly/Parser.h"
|
||||
+#include "llvm/Target/TargetData.h"
|
||||
+#include "llvm/Target/TargetMachine.h"
|
||||
+#include "llvm/Target/TargetRegisterInfo.h"
|
||||
+#include "llvm/Support/Debug.h"
|
||||
+#include "llvm/Support/ErrorHandling.h"
|
||||
+#include "llvm/Support/FormattedStream.h"
|
||||
+#include "llvm/Support/Host.h"
|
||||
+#include "llvm/Support/IRBuilder.h"
|
||||
+#include "llvm/Support/raw_ostream.h"
|
||||
+#include "llvm/Support/SourceMgr.h"
|
||||
+#include "llvm/Support/TargetRegistry.h"
|
||||
+#include "llvm/ADT/Triple.h"
|
||||
+
|
||||
+using namespace llvm;
|
||||
+
|
||||
+namespace {
|
||||
+ /// ASMGenerator generates target-specific assembly code from LLVM IR.
|
||||
+ class ASMGenerator {
|
||||
+ public:
|
||||
+ ASMGenerator() {}
|
||||
+
|
||||
+ /// generate - Generates a target code string from a LLVM IR Value.
|
||||
+ bool generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
|
||||
+ std::string &ASM);
|
||||
+
|
||||
+ private:
|
||||
+ bool getStringFromConstantExpr(Value *ConstData, std::string &Out) const;
|
||||
+ };
|
||||
+
|
||||
+ /// CodeGenIntrinsic - This pass replaces each call to the llvm.codegen
|
||||
+ /// intrinsic with a string generated by ASMGenerator.
|
||||
+ class CodeGenIntrinsic : public FunctionPass {
|
||||
+ public:
|
||||
+ static char ID;
|
||||
+
|
||||
+ CodeGenIntrinsic();
|
||||
+ const char *getPassName() const;
|
||||
+ virtual bool runOnFunction(Function &F);
|
||||
+ };
|
||||
+}
|
||||
+
|
||||
+// -----------------------------------------------------------------------------
|
||||
+static bool getTargetMachineFromModule(Module *M, const StringRef &TripleStr,
|
||||
+ const StringRef &MCPU,
|
||||
+ const StringRef &Features,
|
||||
+ TargetMachine *&TM) {
|
||||
+ std::string ErrMsg;
|
||||
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
|
||||
+ if (!TheTarget) {
|
||||
+ errs() << ErrMsg << "\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ TargetOptions Options;
|
||||
+ TM = TheTarget->createTargetMachine(TripleStr, MCPU, Features, Options);
|
||||
+ assert(TM && "Could not allocate target machine!");
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool createASMAsString(Module *New, const StringRef &Triple,
|
||||
+ const StringRef &MCPU, const StringRef &Features,
|
||||
+ std::string &ASM) {
|
||||
+ TargetMachine *Target;
|
||||
+ if (!getTargetMachineFromModule(New, Triple, MCPU, Features, Target)) {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ // Build up all of the passes that we want to do to the module.
|
||||
+ PassManager PM;
|
||||
+
|
||||
+ // Add the target data from the target machine, if it exists, or the module.
|
||||
+ if (const TargetData *TD = Target->getTargetData())
|
||||
+ PM.add(new TargetData(*TD));
|
||||
+ else
|
||||
+ PM.add(new TargetData(New));
|
||||
+
|
||||
+ {
|
||||
+ raw_string_ostream NameROS(ASM);
|
||||
+ formatted_raw_ostream FOS(NameROS);
|
||||
+
|
||||
+ // Ask the target to add backend passes as necessary.
|
||||
+ int UseVerifier = true;
|
||||
+ if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile,
|
||||
+ UseVerifier)) {
|
||||
+ errs() << "CodeGen Intrinsic: target does not support generation of this "
|
||||
+ << "file type!\n";
|
||||
+
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ PM.run(*New);
|
||||
+ FOS.flush();
|
||||
+ }
|
||||
+
|
||||
+ delete Target;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+bool ASMGenerator::getStringFromConstantExpr(Value *ConstData,
|
||||
+ std::string &Out) const {
|
||||
+ bool Result = false;
|
||||
+ if (ConstantExpr *U = dyn_cast<ConstantExpr>(ConstData)) {
|
||||
+ Value *R = U->getOperand(0);
|
||||
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(R)) {
|
||||
+ Constant *C = GV->getInitializer();
|
||||
+ if (ConstantDataArray *CA = dyn_cast<ConstantDataArray>(C)) {
|
||||
+ Out = CA->getAsString();
|
||||
+ Result = true;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return Result;
|
||||
+}
|
||||
+
|
||||
+bool ASMGenerator::generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
|
||||
+ std::string &ASM) {
|
||||
+ std::string Kernel;
|
||||
+ if (!getStringFromConstantExpr(IRStr, Kernel))
|
||||
+ return false;
|
||||
+
|
||||
+ std::string MCPU;
|
||||
+ if (!getStringFromConstantExpr(MCPUStr, MCPU))
|
||||
+ MCPU = "";
|
||||
+
|
||||
+ std::string Features;
|
||||
+ if (!getStringFromConstantExpr(FeaturesStr, Features))
|
||||
+ Features = "";
|
||||
+
|
||||
+ SMDiagnostic ErrorMessage;
|
||||
+ LLVMContext Context;
|
||||
+ std::auto_ptr<Module> TempModule(
|
||||
+ ParseAssemblyString(Kernel.c_str(), 0, ErrorMessage, Context));
|
||||
+
|
||||
+ Triple TheTriple(TempModule->getTargetTriple());
|
||||
+ const std::string TripleStr = TheTriple.getTriple();
|
||||
+ if(TripleStr.empty()) {
|
||||
+ errs() << "error: Target triple isn't set correctly for the new module.\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ return createASMAsString(TempModule.get(), TripleStr.data(), MCPU.data(),
|
||||
+ Features.data(), ASM);
|
||||
+}
|
||||
+
|
||||
+// -----------------------------------------------------------------------------
|
||||
+INITIALIZE_PASS(CodeGenIntrinsic, "codegen-intrinsic", "CodeGen Intrinsic",
|
||||
+ false, false)
|
||||
+
|
||||
+FunctionPass *llvm::createCodeGenIntrinsicPass() {
|
||||
+ return new CodeGenIntrinsic();
|
||||
+}
|
||||
+
|
||||
+char CodeGenIntrinsic::ID = 0;
|
||||
+
|
||||
+CodeGenIntrinsic::CodeGenIntrinsic()
|
||||
+ : FunctionPass(ID) {
|
||||
+}
|
||||
+
|
||||
+const char *CodeGenIntrinsic::getPassName() const {
|
||||
+ return "Lowering CodeGen Intrinsic.";
|
||||
+}
|
||||
+
|
||||
+bool CodeGenIntrinsic::runOnFunction(Function &F) {
|
||||
+ bool MadeChange = false;
|
||||
+ Module *M = F.getParent();
|
||||
+ if (Function *CG = M->getFunction("llvm.codegen")) {
|
||||
+ for (Function::use_iterator I = CG->use_begin(), E = CG->use_end();
|
||||
+ I != E; ++I) {
|
||||
+ if (CallInst *CI = dyn_cast<CallInst>(*I)) {
|
||||
+ if (&F != CI->getParent()->getParent())
|
||||
+ continue;
|
||||
+
|
||||
+ std::string ASM;
|
||||
+ ASMGenerator *Generator = new ASMGenerator();
|
||||
+ IRBuilder<> Builder(CI->getParent(), CI);
|
||||
+ Value *St;
|
||||
+ if (!Generator->generate(CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
+ CI->getArgOperand(2), ASM)) {
|
||||
+ Type *Ty= CG->getReturnType();
|
||||
+ St = Constant::getNullValue(Ty);
|
||||
+ } else {
|
||||
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims?
|
||||
+ // Seems that, short of multithreaded LLVM, it should be safe;
|
||||
+ // all that is necessary is that a simple Module::iterator loop
|
||||
+ // not be invalidated. Appending to the GlobalVariable list is
|
||||
+ // safe in that sense.
|
||||
+ //
|
||||
+ // All the output passes emit globals last. The ExecutionEngine
|
||||
+ // explicitly supports adding globals to the module after
|
||||
+ // initialization.
|
||||
+ //
|
||||
+ // Still, if it isn't deemed acceptable, then this
|
||||
+ // transformation needs to be a ModulePass (which means it
|
||||
+ // cannot be in the 'llc' pipeline (which uses a
|
||||
+ // FunctionPassManager (which segfaults (not asserts) if
|
||||
+ // provided a ModulePass))).
|
||||
+ St = Builder.CreateGlobalStringPtr(ASM, "ASM");
|
||||
+ }
|
||||
+ CI->replaceAllUsesWith(St);
|
||||
+ CI->eraseFromParent();
|
||||
+ // We should erase the unused globals from current module. But we
|
||||
+ // can't do this within a FunctionPass.
|
||||
+ MadeChange = true;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return MadeChange;
|
||||
+}
|
||||
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
|
||||
index 490547b..95b6e0c 100644
|
||||
--- a/lib/CodeGen/Passes.cpp
|
||||
+++ b/lib/CodeGen/Passes.cpp
|
||||
@@ -305,6 +305,9 @@ void TargetPassConfig::addIRPasses() {
|
||||
|
||||
PM->add(createGCLoweringPass());
|
||||
|
||||
+ // Generate target code for embedded LLVM-IR strings.
|
||||
+ PM->add(createCodeGenIntrinsicPass());
|
||||
+
|
||||
// Make sure that no unreachable blocks are instruction selected.
|
||||
PM->add(createUnreachableBlockEliminationPass());
|
||||
}
|
||||
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
|
||||
index f1b4d80..d87986c 100644
|
||||
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
|
||||
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
|
||||
@@ -5131,6 +5131,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||
case Intrinsic::lifetime_end:
|
||||
// Discard region information.
|
||||
return 0;
|
||||
+
|
||||
+ case Intrinsic::codegen:
|
||||
+ llvm_unreachable("failed to lower codegen intrinsic!");
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
|
||||
index 045ab9e..d9bd19f 100644
|
||||
--- a/lib/Target/LLVMBuild.txt
|
||||
+++ b/lib/Target/LLVMBuild.txt
|
||||
@@ -45,7 +45,7 @@ parent = Libraries
|
||||
type = Library
|
||||
name = Target
|
||||
parent = Libraries
|
||||
-required_libraries = Core MC Support
|
||||
+required_libraries = Core MC Support AsmParser
|
||||
|
||||
; This is a special group whose required libraries are extended (by llvm-build)
|
||||
; with every built target, which makes it easy for tools to include every
|
||||
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
|
||||
index f11efff..1031685 100644
|
||||
--- a/lib/VMCore/Verifier.cpp
|
||||
+++ b/lib/VMCore/Verifier.cpp
|
||||
@@ -1783,6 +1783,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
|
||||
Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
|
||||
"llvm.invariant.end parameter #2 must be a constant integer", &CI);
|
||||
break;
|
||||
+ case Intrinsic::codegen:
|
||||
+ Assert1(isa<ConstantExpr>(CI.getArgOperand(0)),
|
||||
+ "llvm.codegen parameter #1 must be a constant expression", &CI);
|
||||
+ Assert1(isa<ConstantExpr>(CI.getArgOperand(1)) ||
|
||||
+ isa<ConstantPointerNull>(CI.getArgOperand(1)),
|
||||
+ "llvm.codegen parameter #2 must be a constant expression", &CI);
|
||||
+ Assert1(isa<ConstantExpr>(CI.getArgOperand(2)) ||
|
||||
+ isa<ConstantPointerNull>(CI.getArgOperand(2)),
|
||||
+ "llvm.codegen parameter #3 must be a constant expression", &CI);
|
||||
+ break;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
|
||||
new file mode 100644
|
||||
index 0000000..768790d
|
||||
--- /dev/null
|
||||
+++ b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
|
||||
@@ -0,0 +1,28 @@
|
||||
+; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
+
|
||||
+; ModuleID = 'embedded-codegen-ptx.ll'
|
||||
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
|
||||
+target triple = "i386-pc-linux-gnu"
|
||||
+
|
||||
+@llvm_kernel = private unnamed_addr constant [1937 x i8] c"target triple = \22ptx32-pc-linux-gnu\22\0A\0Adefine internal ptx_kernel void @gpu_codegen.ptx_subfn(i8* %ptx.Array) {\0Aptx.setup:\0A %0 = bitcast i8* %ptx.Array to [128 x [128 x i32]]*\0A %1 = call i32 @llvm.ptx.read.nctaid.x()\0A %2 = zext i32 %1 to i64\0A %3 = call i32 @llvm.ptx.read.nctaid.y()\0A %4 = zext i32 %3 to i64\0A %5 = call i32 @llvm.ptx.read.ntid.x()\0A %6 = zext i32 %5 to i64\0A %7 = call i32 @llvm.ptx.read.ntid.y()\0A %8 = zext i32 %7 to i64\0A %9 = call i32 @llvm.ptx.read.ctaid.x()\0A %10 = zext i32 %9 to i64\0A %11 = call i32 @llvm.ptx.read.ctaid.y()\0A %12 = zext i32 %11 to i64\0A %13 = call i32 @llvm.ptx.read.tid.x()\0A %14 = zext i32 %13 to i64\0A %15 = call i32 @llvm.ptx.read.tid.y()\0A %16 = zext i32 %15 to i64\0A br label %ptx.loop_body\0A\0Aptx.exit: ; preds = %polly.stmt.for.body3\0A ret void\0A\0Aptx.loop_body: ; preds = %ptx.setup\0A %p_gpu_index_i = mul i64 %12, %2\0A %17 = add i64 %p_gpu_index_i, %10\0A %p_gpu_index_j = mul i64 %16, %6\0A %18 = add i64 %p_gpu_index_j, %14\0A br label %polly.stmt.for.body3\0A\0Apolly.stmt.for.body3: ; preds = %ptx.loop_body\0A %19 = trunc i64 %17 to i32\0A %p_mul = shl nsw i32 %19, 7\0A %20 = trunc i64 %18 to i32\0A %p_add = add nsw i32 %p_mul, %20\0A %21 = trunc i64 %17 to i32\0A %22 = trunc i64 %18 to i32\0A %p_arrayidx4 = getelementptr inbounds [128 x [128 x i32]]* %0, i32 0, i32 %21, i32 %22\0A store i32 %p_add, i32* %p_arrayidx4\0A br label %ptx.exit\0A}\0A\0Adeclare i32 @llvm.ptx.read.nctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.y() nounwind readnone\0A\00"
|
||||
+
|
||||
+@.str = private unnamed_addr constant [3 x i8] c"%s\00", align 1
|
||||
+
|
||||
+define i32 @gpu_codegen() nounwind {
|
||||
+entry:
|
||||
+ %0 = call i8* @llvm.codegen(i8* getelementptr inbounds ([1937 x i8]* @llvm_kernel, i32 0, i32 0), i8* null, i8* null)
|
||||
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0)
|
||||
+ ret i32 0
|
||||
+}
|
||||
+
|
||||
+define i32 @main() nounwind {
|
||||
+entry:
|
||||
+ %call = call i32 @gpu_codegen()
|
||||
+ ret i32 0
|
||||
+}
|
||||
+
|
||||
+declare i8* @llvm.codegen(i8*, i8*, i8*) nounwind
|
||||
+
|
||||
+declare i32 @printf(i8*, ...) nounwind
|
||||
+
|
||||
+; CHECK: .entry gpu_codegen_2E_ptx_subfn (.param .b32 __param_1)
|
||||
diff --git a/test/CodeGen/X86/EmbeddedCG/lit.local.cfg b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
|
||||
new file mode 100644
|
||||
index 0000000..346ffa1
|
||||
--- /dev/null
|
||||
+++ b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
|
||||
@@ -0,0 +1,5 @@
|
||||
+config.suffixes = ['.ll', '.c', '.cpp']
|
||||
+
|
||||
+targets = set(config.root.targets_to_build.split())
|
||||
+if not 'PTX' in targets:
|
||||
+ config.unsupported = True
|
||||
--
|
||||
1.7.6.5
|
||||
|
Loading…
Reference in New Issue