[NVPTX] Add GenericToNVVM IR converter to better handle idiomatic LLVM IR inputs

This converter currently only handles global variables in address space 0. For
these variables, they are promoted to address space 1 (global memory), and all
uses are updated to point to the result of a cvta.global instruction on the new
variable.

The motivation for this is address space 0 global variables are illegal since we
cannot declare variables in the generic address space.  Instead, we place the
variables in address space 1 and explicitly convert the pointer to address
space 0. This is primarily intended to help new users who expect to be able to
place global variables in the default address space.

llvm-svn: 182254
This commit is contained in:
Justin Holewinski 2013-05-20 12:13:32 +00:00
parent 700b6fa934
commit 01f89f0428
8 changed files with 550 additions and 80 deletions

View File

@ -23,6 +23,7 @@ set(NVPTXCodeGen_sources
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
NVVMReflect.cpp
NVPTXGenericToNVVM.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})

View File

@ -62,6 +62,7 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
ModulePass *createGenericToNVVMPass();
bool isImageOrSamplerVal(const Value *, const Module *);

View File

@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.
void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
void DiscoverDependentGlobals(const Value *V,
DenseSet<const GlobalVariable *> &Globals) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
Globals.insert(GV);
else {
if (User *U = dyn_cast<User>(V)) {
if (const User *U = dyn_cast<User>(V)) {
for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
DiscoverDependentGlobals(U->getOperand(i), Globals);
}
@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
/// instances to be emitted, but only after any dependents have been added
/// first.
void VisitGlobalVariableForEmission(
GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
DenseSet<const GlobalVariable *> &Visited,
DenseSet<const GlobalVariable *> &Visiting) {
// Have we already visited this one?
if (Visited.count(GV))
return;
@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission(
Visiting.insert(GV);
// Make sure we visit all dependents first
DenseSet<GlobalVariable *> Others;
DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
E = Others.end();
for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
E = Others.end();
I != E; ++I)
VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
if (!GlobalsEmitted) {
emitGlobals(*MF->getFunction()->getParent());
GlobalsEmitted = true;
}
// Set up
MRI = &MF->getRegInfo();
F = MF->getFunction();
@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C,
return false;
}
void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = FI;
@ -921,6 +928,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
recordAndEmitFilenames(M);
GlobalsEmitted = false;
return false; // success
}
void NVPTXAsmPrinter::emitGlobals(const Module &M) {
SmallString<128> Str2;
raw_svector_ostream OS2(Str2);
@ -931,13 +944,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
// global variable in order, and ensure that we emit it *after* its dependent
// globals. We use a little extra memory maintaining both a set and a list to
// have fast searches while maintaining a strict ordering.
SmallVector<GlobalVariable *, 8> Globals;
DenseSet<GlobalVariable *> GVVisited;
DenseSet<GlobalVariable *> GVVisiting;
SmallVector<const GlobalVariable *, 8> Globals;
DenseSet<const GlobalVariable *> GVVisited;
DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
++I)
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
@ -951,7 +964,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
OS2 << '\n';
OutStreamer.EmitRawText(OS2.str());
return false; // success
}
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
@ -989,6 +1001,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
}
bool NVPTXAsmPrinter::doFinalization(Module &M) {
// If we did not emit any functions, then the global declarations have not
// yet been emitted.
if (!GlobalsEmitted) {
emitGlobals(M);
GlobalsEmitted = true;
}
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
@ -1063,7 +1083,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
}
}
void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
raw_ostream &O,
bool processDemoted) {
// Skip meta data
@ -1107,10 +1128,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (llvm::isSampler(*GVar)) {
O << ".global .samplerref " << llvm::getSamplerName(*GVar);
Constant *Initializer = NULL;
const Constant *Initializer = NULL;
if (GVar->hasInitializer())
Initializer = GVar->getInitializer();
ConstantInt *CI = NULL;
const ConstantInt *CI = NULL;
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
@ -1183,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (localDecls.find(demotedFunc) != localDecls.end())
localDecls[demotedFunc].push_back(GVar);
else {
std::vector<GlobalVariable *> temp;
std::vector<const GlobalVariable *> temp;
temp.push_back(GVar);
localDecls[demotedFunc] = temp;
}
@ -1213,7 +1234,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
Constant *Initializer = GVar->getInitializer();
const Constant *Initializer = GVar->getInitializer();
if (!Initializer->isNullValue()) {
O << " = ";
printScalarConstant(Initializer, O);
@ -1237,7 +1258,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
Constant *Initializer = GVar->getInitializer();
const Constant *Initializer = GVar->getInitializer();
if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
@ -1287,7 +1308,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
if (localDecls.find(f) == localDecls.end())
return;
std::vector<GlobalVariable *> &gvars = localDecls[f];
std::vector<const GlobalVariable *> &gvars = localDecls[f];
for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
O << "\t// demoted variable\n\t";
@ -1761,12 +1782,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
O << utohexstr(API.getZExtValue());
}
void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
O << CI->getValue();
return;
}
if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
printFPConstant(CFP, O);
return;
}
@ -1774,13 +1795,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
O << "0";
return;
}
if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
O << *Mang->getSymbol(GVar);
return;
}
if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
Value *v = Cexpr->stripPointerCasts();
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
O << *Mang->getSymbol(GVar);
return;
} else {
@ -1791,7 +1812,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
@ -1819,13 +1840,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
@ -1841,13 +1862,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
llvm_unreachable("unsupported integer const type");
} else if (ETy == Type::getInt64Ty(CPV->getContext())) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
@ -1868,7 +1889,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
case Type::FloatTyID:
case Type::DoubleTyID: {
ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
@ -1884,10 +1905,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
break;
}
case Type::PointerTyID: {
if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
aggBuffer->addSymbol(GVar);
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
Value *v = Cexpr->stripPointerCasts();
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v);
}
unsigned int s = TD->getTypeAllocSize(CPV->getType());
@ -1916,7 +1937,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
}
void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
int Bytes;

View File

@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
SmallVector<Value *, 4> Symbols;
SmallVector<const Value *, 4> Symbols;
private:
unsigned curpos;
@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
}
return curpos;
}
void addSymbol(Value *GVar) {
void addSymbol(const Value *GVar) {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
numSymbols++;
@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
Value *v = Symbols[nSym];
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
const Value *v = Symbols[nSym];
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
llvm_unreachable("symbol type unknown");
@ -205,10 +205,12 @@ private:
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool = false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
@ -234,6 +236,8 @@ protected:
private:
std::string CurrentBankselLabelInBasicBlock;
bool GlobalsEmitted;
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
@ -247,7 +251,7 @@ private:
std::map<const Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
// To record filename to ID mapping
std::map<std::string, unsigned> filenameMap;
@ -256,15 +260,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
void printScalarConstant(Constant *CPV, raw_ostream &O);
void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
void printOperandProper(const MachineOperand &MO);
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
void emitDeclarations(Module &, raw_ostream &O);
void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);

View File

@ -0,0 +1,436 @@
//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Convert generic global variables into either .global or .const access based
// on the variable's "constant" qualifier.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/PassManager.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
namespace llvm {
void initializeGenericToNVVMPass(PassRegistry &);
}
namespace {
class GenericToNVVM : public ModulePass {
public:
static char ID;
GenericToNVVM() : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
private:
Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
IRBuilder<> &Builder);
Value *remapConstant(Module *M, Function *F, Constant *C,
IRBuilder<> &Builder);
Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
Constant *C,
IRBuilder<> &Builder);
Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
IRBuilder<> &Builder);
void remapNamedMDNode(Module *M, NamedMDNode *N);
MDNode *remapMDNode(Module *M, MDNode *N);
typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
GVMapTy GVMap;
ConstantToValueMapTy ConstantToValueMap;
};
}
char GenericToNVVM::ID = 0;
ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
INITIALIZE_PASS(
GenericToNVVM, "generic-to-nvvm",
"Ensure that the global variables are in the global address space", false,
false)
bool GenericToNVVM::runOnModule(Module &M) {
// Create a clone of each global variable that has the default address space.
// The clone is created with the global address space specifier, and the pair
// of original global variable and its clone is placed in the GVMap for later
// use.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E;) {
GlobalVariable *GV = I++;
if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
!GV->getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
M, GV->getType()->getElementType(), GV->isConstant(),
GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
"", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
NewGV->copyAttributesFrom(GV);
GVMap[GV] = NewGV;
}
}
// Return immediately, if every global variable has a specific address space
// specifier.
if (GVMap.empty()) {
return false;
}
// Walk through the instructions in function defitinions, and replace any use
// of original global variables in GVMap with a use of the corresponding
// copies in GVMap. If necessary, promote constants to instructions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (I->isDeclaration()) {
continue;
}
IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
++BBI) {
for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
++II) {
for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
Value *Operand = II->getOperand(i);
if (isa<Constant>(Operand)) {
II->setOperand(
i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
}
}
}
}
ConstantToValueMap.clear();
}
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
for (Module::named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; I++) {
remapNamedMDNode(&M, I);
}
// Walk through the global variable initializers, and replace any use of
// original global variables in GVMap with a use of the corresponding copies
// in GVMap. The copies need to be bitcast to the original global variable
// types, as we cannot use cvta in global variable initializers.
for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
GlobalVariable *GV = I->first;
GlobalVariable *NewGV = I->second;
++I;
Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
// At this point, the remaining uses of GV should be found only in global
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
UI != UE;) {
Use &U = (UI++).getUse();
U.set(BitCastNewGV);
}
std::string Name = GV->getName();
GV->removeDeadConstantUsers();
GV->eraseFromParent();
NewGV->setName(Name);
}
GVMap.clear();
return true;
}
Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
GlobalVariable *GV,
IRBuilder<> &Builder) {
PointerType *GVType = GV->getType();
Value *CVTA = NULL;
// See if the address space conversion requires the operand to be bitcast
// to i8 addrspace(n)* first.
EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true);
if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
// A bitcast to i8 addrspace(n)* on the operand is needed.
LLVMContext &Context = M->getContext();
unsigned int AddrSpace = GVType->getAddressSpace();
Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
// Insert the address space conversion.
Type *ResultType =
PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
SmallVector<Type *, 2> ParamTypes;
ParamTypes.push_back(ResultType);
ParamTypes.push_back(DestTy);
Function *CVTAFunction = Intrinsic::getDeclaration(
M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
// Another bitcast from i8 * to <the element type of GVType> * is
// required.
DestTy =
PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC);
CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
} else {
// A simple CVTA is enough.
SmallVector<Type *, 2> ParamTypes;
ParamTypes.push_back(PointerType::get(GVType->getElementType(),
llvm::ADDRESS_SPACE_GENERIC));
ParamTypes.push_back(GVType);
Function *CVTAFunction = Intrinsic::getDeclaration(
M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
}
return CVTA;
}
Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
IRBuilder<> &Builder) {
// If the constant C has been converted already in the given function F, just
// return the converted value.
ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C);
if (CTII != ConstantToValueMap.end()) {
return CTII->second;
}
Value *NewValue = C;
if (isa<GlobalVariable>(C)) {
// If the constant C is a global variable and is found in GVMap, generate a
// set set of instructions that convert the clone of C with the global
// address space specifier to a generic pointer.
// The constant C cannot be used here, as it will be erased from the
// module eventually. And the clone of C with the global address space
// specifier cannot be used here either, as it will affect the types of
// other instructions in the function. Hence, this address space conversion
// is required.
GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
if (I != GVMap.end()) {
NewValue = getOrInsertCVTA(M, F, I->second, Builder);
}
} else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) ||
isa<ConstantStruct>(C)) {
// If any element in the constant vector or aggregate C is or uses a global
// variable in GVMap, the constant C needs to be reconstructed, using a set
// of instructions.
NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder);
} else if (isa<ConstantExpr>(C)) {
// If any operand in the constant expression C is or uses a global variable
// in GVMap, the constant expression C needs to be reconstructed, using a
// set of instructions.
NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder);
}
ConstantToValueMap[C] = NewValue;
return NewValue;
}
Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
Module *M, Function *F, Constant *C, IRBuilder<> &Builder) {
bool OperandChanged = false;
SmallVector<Value *, 4> NewOperands;
unsigned NumOperands = C->getNumOperands();
// Check if any element is or uses a global variable in GVMap, and thus
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
Value *Operand = C->getOperand(i);
Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
// If none of the elements has been modified, return C as it is.
if (!OperandChanged) {
return C;
}
// If any of the elements has been modified, construct the equivalent
// vector or aggregate value with a set instructions and the converted
// elements.
Value *NewValue = UndefValue::get(C->getType());
if (isa<ConstantVector>(C)) {
for (unsigned i = 0; i < NumOperands; ++i) {
Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx);
}
} else {
for (unsigned i = 0; i < NumOperands; ++i) {
NewValue =
Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i));
}
}
return NewValue;
}
Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
IRBuilder<> &Builder) {
bool OperandChanged = false;
SmallVector<Value *, 4> NewOperands;
unsigned NumOperands = C->getNumOperands();
// Check if any operand is or uses a global variable in GVMap, and thus
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
Value *Operand = C->getOperand(i);
Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
// If none of the operands has been modified, return C as it is.
if (!OperandChanged) {
return C;
}
// If any of the operands has been modified, construct the instruction with
// the converted operands.
unsigned Opcode = C->getOpcode();
switch (Opcode) {
case Instruction::ICmp:
// CompareConstantExpr (icmp)
return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()),
NewOperands[0], NewOperands[1]);
case Instruction::FCmp:
// CompareConstantExpr (fcmp)
assert(false && "Address space conversion should have no effect "
"on float point CompareConstantExpr (fcmp)!");
return C;
case Instruction::ExtractElement:
// ExtractElementConstantExpr
return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
case Instruction::InsertElement:
// InsertElementConstantExpr
return Builder.CreateInsertElement(NewOperands[0], NewOperands[1],
NewOperands[2]);
case Instruction::ShuffleVector:
// ShuffleVector
return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
NewOperands[2]);
case Instruction::ExtractValue:
// ExtractValueConstantExpr
return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
case Instruction::InsertValue:
// InsertValueConstantExpr
return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
C->getIndices());
case Instruction::GetElementPtr:
// GetElementPtrConstantExpr
return cast<GEPOperator>(C)->isInBounds()
? Builder.CreateGEP(
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1))
: Builder.CreateInBoundsGEP(
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1));
case Instruction::Select:
// SelectConstantExpr
return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
default:
// BinaryConstantExpr
if (Instruction::isBinaryOp(Opcode)) {
return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()),
NewOperands[0], NewOperands[1]);
}
// UnaryConstantExpr
if (Instruction::isCast(Opcode)) {
return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
NewOperands[0], C->getType());
}
assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
return C;
}
}
void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
bool OperandChanged = false;
SmallVector<MDNode *, 16> NewOperands;
unsigned NumOperands = N->getNumOperands();
// Check if any operand is or contains a global variable in GVMap, and thus
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
MDNode *Operand = N->getOperand(i);
MDNode *NewOperand = remapMDNode(M, Operand);
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
// If none of the operands has been modified, return immediately.
if (!OperandChanged) {
return;
}
// Replace the old operands with the new operands.
N->dropAllReferences();
for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(),
E = NewOperands.end();
I != E; ++I) {
N->addOperand(*I);
}
}
MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
bool OperandChanged = false;
SmallVector<Value *, 8> NewOperands;
unsigned NumOperands = N->getNumOperands();
// Check if any operand is or contains a global variable in GVMap, and thus
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
Value *Operand = N->getOperand(i);
Value *NewOperand = Operand;
if (Operand) {
if (isa<GlobalVariable>(Operand)) {
GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
if (I != GVMap.end()) {
NewOperand = I->second;
if (++i < NumOperands) {
NewOperands.push_back(NewOperand);
// Address space of the global variable follows the global variable
// in the global variable debug info (see createGlobalVariable in
// lib/Analysis/DIBuilder.cpp).
NewOperand =
ConstantInt::get(Type::getInt32Ty(M->getContext()),
I->second->getType()->getAddressSpace());
}
}
} else if (isa<MDNode>(Operand)) {
NewOperand = remapMDNode(M, cast<MDNode>(Operand));
}
}
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
// If none of the operands has been modified, return N as it is.
if (!OperandChanged) {
return N;
}
// If any of the operands has been modified, create a new MDNode with the new
// operands.
return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
}

View File

@ -1510,38 +1510,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
"mov.u32 \t$result, $src;",
[(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
"mov.u64 \t$result, $src;",
[(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
// @TODO: Revisit this. There is a type
// contradiction between iPTRAny and iPTR for the def.
/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
"mov.u32 \t$result, $src;",
[(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
(Wrapper tglobaladdr:$src)))]>;
def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
"mov.u64 \t$result, $src;",
[(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
(Wrapper tglobaladdr:$src)))]>;*/
def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
"mov.u32 \t$result, $src;",
[(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
"mov.u64 \t$result, $src;",
[(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
// nvvm.ptr.gen.to.param

View File

@ -49,6 +49,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(
@ -102,6 +104,7 @@ public:
return getTM<NVPTXTargetMachine>();
}
virtual void addIRPasses();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
};
@ -112,6 +115,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
void NVPTXPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
addPass(createGenericToNVVMPass());
}
bool NVPTXPassConfig::addInstSelector() {
addPass(createLowerAggrCopies());
addPass(createSplitBBatBarPass());

View File

@ -0,0 +1,25 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; Ensure global variables in address space 0 are promoted to address space 1
; CHECK: .global .align 4 .u32 myglobal = 42;
@myglobal = internal global i32 42, align 4
; CHECK: .global .align 4 .u32 myconst = 42;
@myconst = internal constant i32 42, align 4
define void @foo(i32* %a, i32* %b) {
; CHECK: cvta.global.u32
%ld1 = load i32* @myglobal
; CHECK: cvta.global.u32
%ld2 = load i32* @myconst
store i32 %ld1, i32* %a
store i32 %ld2, i32* %b
ret void
}
!nvvm.annotations = !{!0}
!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1}