forked from OSchip/llvm-project
[NVPTX] Handle __nvvm_reflect("__CUDA_ARCH").
Summary: libdevice in recent CUDA versions relies on __nvvm_reflect() to select GPU-specific bitcode. This patch addresses the requirement. Reviewers: jlebar Subscribers: jholewinski, sanjoy, hiraditya, bixia, llvm-commits Differential Revision: https://reviews.llvm.org/D50207 llvm-svn: 338908
This commit is contained in:
parent
feb2a58860
commit
0a11b6366a
|
@ -46,7 +46,7 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
|
|||
ModulePass *createNVPTXAssignValidGlobalNamesPass();
|
||||
ModulePass *createGenericToNVVMPass();
|
||||
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
|
||||
FunctionPass *createNVVMReflectPass();
|
||||
FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
|
||||
MachineFunctionPass *createNVPTXPrologEpilogPass();
|
||||
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
|
||||
FunctionPass *createNVPTXImageOptimizerPass();
|
||||
|
|
|
@ -195,7 +195,7 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
|||
Builder.addExtension(
|
||||
PassManagerBuilder::EP_EarlyAsPossible,
|
||||
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
|
||||
PM.add(createNVVMReflectPass());
|
||||
PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
|
||||
PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
|
||||
});
|
||||
}
|
||||
|
@ -258,7 +258,8 @@ void NVPTXPassConfig::addIRPasses() {
|
|||
// it here does nothing. But since we need it for correctness when lowering
|
||||
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
|
||||
// call addEarlyAsPossiblePasses.
|
||||
addPass(createNVVMReflectPass());
|
||||
const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
|
||||
addPass(createNVVMReflectPass(ST.getSmVersion()));
|
||||
|
||||
if (getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createNVPTXImageOptimizerPass());
|
||||
|
|
|
@ -50,7 +50,9 @@ namespace {
|
|||
class NVVMReflect : public FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
NVVMReflect() : FunctionPass(ID) {
|
||||
unsigned int SmVersion;
|
||||
NVVMReflect() : NVVMReflect(0) {}
|
||||
explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) {
|
||||
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
@ -58,7 +60,9 @@ public:
|
|||
};
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
|
||||
FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) {
|
||||
return new NVVMReflect(SmVersion);
|
||||
}
|
||||
|
||||
static cl::opt<bool>
|
||||
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
|
||||
|
@ -163,6 +167,8 @@ bool NVVMReflect::runOnFunction(Function &F) {
|
|||
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
|
||||
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
|
||||
ReflectVal = Flag->getSExtValue();
|
||||
} else if (ReflectArg == "__CUDA_ARCH") {
|
||||
ReflectVal = SmVersion * 10;
|
||||
}
|
||||
Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
|
||||
ToRemove.push_back(Call);
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
|
||||
; Verify that __nvvm_reflect() is replaced with an appropriate value.
|
||||
;
|
||||
; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 \
|
||||
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
|
||||
; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 -mcpu=sm_35 \
|
||||
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
|
||||
|
||||
@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
|
||||
|
||||
declare i32 @__nvvm_reflect(i8*)
|
||||
|
||||
; COMMON-LABEL: @foo
|
||||
define i32 @foo(float %a, float %b) {
|
||||
; COMMON-NOT: call i32 @__nvvm_reflect
|
||||
%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([12 x i8], [12 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
|
||||
; SM20: ret i32 200
|
||||
; SM35: ret i32 350
|
||||
ret i32 %reflect
|
||||
}
|
||||
|
Loading…
Reference in New Issue