forked from OSchip/llvm-project
CodeGen: clang-format goodness
The changed files are not yet clang-format clean, but we are getting close. llvm-svn: 174403
This commit is contained in:
parent
dc69f6fbca
commit
c14582f276
|
@ -31,15 +31,13 @@ using namespace llvm;
|
|||
using namespace polly;
|
||||
|
||||
static cl::opt<bool>
|
||||
Aligned("enable-polly-aligned",
|
||||
cl::desc("Assumed aligned memory accesses."), cl::Hidden,
|
||||
cl::value_desc("OpenMP code generation enabled if true"),
|
||||
Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."),
|
||||
cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
|
||||
cl::init(false), cl::ZeroOrMore);
|
||||
|
||||
static cl::opt<bool>
|
||||
SCEVCodegen("polly-codegen-scev",
|
||||
cl::desc("Use SCEV based code generation."), cl::Hidden,
|
||||
cl::init(false), cl::ZeroOrMore);
|
||||
SCEVCodegen("polly-codegen-scev", cl::desc("Use SCEV based code generation."),
|
||||
cl::Hidden, cl::init(false), cl::ZeroOrMore);
|
||||
|
||||
/// The SCEVRewriter takes a scalar evolution expression and updates the
|
||||
/// following components:
|
||||
|
@ -81,7 +79,7 @@ SCEVCodegen("polly-codegen-scev",
|
|||
/// - Instructions that reference operands already calculated within the
|
||||
/// basic block.
|
||||
/// - Store instructions
|
||||
struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
|
||||
struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV *> {
|
||||
public:
|
||||
static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
|
||||
ValueMapT &GlobalMap, ValueMapT &BBMap) {
|
||||
|
@ -114,13 +112,10 @@ public:
|
|||
return Expr;
|
||||
}
|
||||
|
||||
|
||||
return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
|
||||
return SCEVVisitor<SCEVRewriter, const SCEV *>::visit(Expr);
|
||||
}
|
||||
|
||||
const SCEV *visitConstant(const SCEVConstant *Constant) {
|
||||
return Constant;
|
||||
}
|
||||
const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; }
|
||||
|
||||
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
|
||||
const SCEV *Operand = visit(Expr->getOperand());
|
||||
|
@ -364,13 +359,13 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
|
|||
// We assume constants never change.
|
||||
// This avoids map lookups for many calls to this function.
|
||||
if (isa<Constant>(Old))
|
||||
return const_cast<Value*>(Old);
|
||||
return const_cast<Value *>(Old);
|
||||
|
||||
if (GlobalMap.count(Old)) {
|
||||
Value *New = GlobalMap[Old];
|
||||
|
||||
if (Old->getType()->getScalarSizeInBits()
|
||||
< New->getType()->getScalarSizeInBits())
|
||||
if (Old->getType()->getScalarSizeInBits() <
|
||||
New->getType()->getScalarSizeInBits())
|
||||
New = Builder.CreateTruncOrBitCast(New, Old->getType());
|
||||
|
||||
return New;
|
||||
|
@ -381,11 +376,10 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
|
|||
}
|
||||
|
||||
if (SCEVCodegen && SE.isSCEVable(Old->getType()))
|
||||
if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
|
||||
if (const SCEV *Scev = SE.getSCEV(const_cast<Value *>(Old)))
|
||||
if (!isa<SCEVCouldNotCompute>(Scev)) {
|
||||
const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
|
||||
*Statement.getParent(), SE,
|
||||
GlobalMap, BBMap);
|
||||
const SCEV *NewScev = SCEVRewriter::rewrite(
|
||||
Scev, *Statement.getParent(), SE, GlobalMap, BBMap);
|
||||
SCEVExpander Expander(SE, "polly");
|
||||
Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
|
||||
Builder.GetInsertPoint());
|
||||
|
@ -405,7 +399,7 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
|
|||
|
||||
// Everything else is probably a scop-constant value defined as global,
|
||||
// function parameter or an instruction not within the scop.
|
||||
return const_cast<Value*>(Old);
|
||||
return const_cast<Value *>(Old);
|
||||
}
|
||||
|
||||
void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
|
||||
|
@ -414,13 +408,14 @@ void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
|
|||
|
||||
// Replace old operands with the new ones.
|
||||
for (Instruction::const_op_iterator OI = Inst->op_begin(),
|
||||
OE = Inst->op_end(); OI != OE; ++OI) {
|
||||
OE = Inst->op_end();
|
||||
OI != OE; ++OI) {
|
||||
Value *OldOperand = *OI;
|
||||
Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
|
||||
|
||||
if (!NewOperand) {
|
||||
assert(!isa<StoreInst>(NewInst)
|
||||
&& "Store instructions are always needed!");
|
||||
assert(!isa<StoreInst>(NewInst) &&
|
||||
"Store instructions are always needed!");
|
||||
delete NewInst;
|
||||
return;
|
||||
}
|
||||
|
@ -435,9 +430,9 @@ void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
|
|||
NewInst->setName("p_" + Inst->getName());
|
||||
}
|
||||
|
||||
std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
|
||||
__isl_keep isl_map *AccessRelation, Value *BaseAddress,
|
||||
ValueMapT &BBMap, ValueMapT &GlobalMap) {
|
||||
std::vector<Value *> BlockGenerator::getMemoryAccessIndex(
|
||||
__isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap,
|
||||
ValueMapT &GlobalMap) {
|
||||
|
||||
assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
|
||||
"Only single dimensional access functions supported");
|
||||
|
@ -456,7 +451,7 @@ std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
|
|||
Type *Ty = Builder.getInt64Ty();
|
||||
OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
|
||||
|
||||
std::vector<Value*> IndexArray;
|
||||
std::vector<Value *> IndexArray;
|
||||
Value *NullValue = Constant::getNullValue(Ty);
|
||||
IndexArray.push_back(NullValue);
|
||||
IndexArray.push_back(OffsetValue);
|
||||
|
@ -464,19 +459,17 @@ std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
|
|||
}
|
||||
|
||||
Value *BlockGenerator::getNewAccessOperand(
|
||||
__isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
|
||||
ValueMapT &BBMap, ValueMapT &GlobalMap) {
|
||||
std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
|
||||
BaseAddress,
|
||||
BBMap, GlobalMap);
|
||||
Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
|
||||
"p_newarrayidx_");
|
||||
__isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap,
|
||||
ValueMapT &GlobalMap) {
|
||||
std::vector<Value *> IndexArray =
|
||||
getMemoryAccessIndex(NewAccessRelation, BaseAddress, BBMap, GlobalMap);
|
||||
Value *NewOperand =
|
||||
Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_");
|
||||
return NewOperand;
|
||||
}
|
||||
|
||||
Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
|
||||
const Value *Pointer,
|
||||
ValueMapT &BBMap,
|
||||
Value *BlockGenerator::generateLocationAccessed(
|
||||
const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap,
|
||||
ValueMapT &GlobalMap) {
|
||||
MemoryAccess &Access = Statement.getAccessFor(Inst);
|
||||
isl_map *CurrentAccessRelation = Access.getAccessRelation();
|
||||
|
@ -490,9 +483,9 @@ Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
|
|||
if (!NewAccessRelation) {
|
||||
NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
|
||||
} else {
|
||||
Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
|
||||
NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
|
||||
BBMap, GlobalMap);
|
||||
Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr());
|
||||
NewPointer =
|
||||
getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, GlobalMap);
|
||||
}
|
||||
|
||||
isl_map_free(CurrentAccessRelation);
|
||||
|
@ -500,23 +493,21 @@ Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
|
|||
return NewPointer;
|
||||
}
|
||||
|
||||
Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
|
||||
ValueMapT &BBMap,
|
||||
ValueMapT &GlobalMap) {
|
||||
Value *BlockGenerator::generateScalarLoad(
|
||||
const LoadInst *Load, ValueMapT &BBMap, ValueMapT &GlobalMap) {
|
||||
const Value *Pointer = Load->getPointerOperand();
|
||||
const Instruction *Inst = dyn_cast<Instruction>(Load);
|
||||
Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
|
||||
Value *ScalarLoad = Builder.CreateLoad(NewPointer,
|
||||
Load->getName() + "_p_scalar_");
|
||||
Value *ScalarLoad =
|
||||
Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
|
||||
return ScalarLoad;
|
||||
}
|
||||
|
||||
Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
|
||||
ValueMapT &BBMap,
|
||||
ValueMapT &GlobalMap) {
|
||||
Value *BlockGenerator::generateScalarStore(
|
||||
const StoreInst *Store, ValueMapT &BBMap, ValueMapT &GlobalMap) {
|
||||
const Value *Pointer = Store->getPointerOperand();
|
||||
Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
|
||||
GlobalMap);
|
||||
Value *NewPointer =
|
||||
generateLocationAccessed(Store, Pointer, BBMap, GlobalMap);
|
||||
Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
|
||||
|
||||
return Builder.CreateStore(ValueOperand, NewPointer);
|
||||
|
@ -547,8 +538,8 @@ void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
|
|||
|
||||
void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
|
||||
BasicBlock *BB = Statement.getBasicBlock();
|
||||
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
|
||||
Builder.GetInsertPoint(), P);
|
||||
BasicBlock *CopyBB =
|
||||
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
|
||||
CopyBB->setName("polly.stmt." + BB->getName());
|
||||
Builder.SetInsertPoint(CopyBB->begin());
|
||||
|
||||
|
@ -559,19 +550,16 @@ void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
|
|||
copyInstruction(II, BBMap, GlobalMap);
|
||||
}
|
||||
|
||||
VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
|
||||
VectorValueMapT &GlobalMaps,
|
||||
ScopStmt &Stmt,
|
||||
__isl_keep isl_map *Schedule,
|
||||
Pass *P)
|
||||
VectorBlockGenerator::VectorBlockGenerator(
|
||||
IRBuilder<> &B, VectorValueMapT &GlobalMaps, ScopStmt &Stmt,
|
||||
__isl_keep isl_map *Schedule, Pass *P)
|
||||
: BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
|
||||
assert(GlobalMaps.size() > 1 && "Only one vector lane found");
|
||||
assert(Schedule && "No statement domain provided");
|
||||
}
|
||||
|
||||
Value *VectorBlockGenerator::getVectorValue(const Value *Old,
|
||||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
Value *VectorBlockGenerator::getVectorValue(
|
||||
const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
|
||||
if (VectorMap.count(Old))
|
||||
return VectorMap[Old];
|
||||
|
||||
|
@ -580,10 +568,8 @@ Value *VectorBlockGenerator::getVectorValue(const Value *Old,
|
|||
Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
|
||||
|
||||
for (int Lane = 0; Lane < Width; Lane++)
|
||||
Vector = Builder.CreateInsertElement(Vector,
|
||||
getNewValue(Old,
|
||||
ScalarMaps[Lane],
|
||||
GlobalMaps[Lane]),
|
||||
Vector = Builder.CreateInsertElement(
|
||||
Vector, getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane]),
|
||||
Builder.getInt32(Lane));
|
||||
|
||||
VectorMap[Old] = Vector;
|
||||
|
@ -606,10 +592,10 @@ Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
|
|||
const Value *Pointer = Load->getPointerOperand();
|
||||
Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
|
||||
Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
|
||||
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
|
||||
"vector_ptr");
|
||||
LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
|
||||
Load->getName() + "_p_vec_full");
|
||||
Value *VectorPtr =
|
||||
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
|
||||
LoadInst *VecLoad =
|
||||
Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
|
||||
if (!Aligned)
|
||||
VecLoad->setAlignment(8);
|
||||
|
||||
|
@ -623,25 +609,22 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
|
|||
Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
|
||||
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
|
||||
Load->getName() + "_p_vec_p");
|
||||
LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
|
||||
Load->getName() + "_p_splat_one");
|
||||
LoadInst *ScalarLoad =
|
||||
Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
|
||||
|
||||
if (!Aligned)
|
||||
ScalarLoad->setAlignment(8);
|
||||
|
||||
Constant *SplatVector =
|
||||
Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
|
||||
getVectorWidth()));
|
||||
Constant *SplatVector = Constant::getNullValue(
|
||||
VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
|
||||
|
||||
Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
|
||||
SplatVector,
|
||||
Load->getName()
|
||||
+ "_p_splat");
|
||||
Value *VectorLoad = Builder.CreateShuffleVector(
|
||||
ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
|
||||
return VectorLoad;
|
||||
}
|
||||
|
||||
Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
Value *VectorBlockGenerator::generateUnknownStrideLoad(
|
||||
const LoadInst *Load, VectorValueMapT &ScalarMaps) {
|
||||
int VectorWidth = getVectorWidth();
|
||||
const Value *Pointer = Load->getPointerOperand();
|
||||
VectorType *VectorType = VectorType::get(
|
||||
|
@ -651,24 +634,22 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
|
|||
|
||||
for (int i = 0; i < VectorWidth; i++) {
|
||||
Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
|
||||
Value *ScalarLoad = Builder.CreateLoad(NewPointer,
|
||||
Load->getName() + "_p_scalar_");
|
||||
Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
|
||||
Builder.getInt32(i),
|
||||
Load->getName() + "_p_vec_");
|
||||
Value *ScalarLoad =
|
||||
Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
|
||||
Vector = Builder.CreateInsertElement(
|
||||
Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
|
||||
}
|
||||
|
||||
return Vector;
|
||||
}
|
||||
|
||||
void VectorBlockGenerator::generateLoad(const LoadInst *Load,
|
||||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
void VectorBlockGenerator::generateLoad(
|
||||
const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
|
||||
if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
|
||||
!VectorType::isValidElementType(Load->getType())) {
|
||||
for (int i = 0; i < getVectorWidth(); i++)
|
||||
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
|
||||
GlobalMaps[i]);
|
||||
ScalarMaps[i][Load] =
|
||||
generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -689,8 +670,8 @@ void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
|
|||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
int VectorWidth = getVectorWidth();
|
||||
Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
|
||||
ScalarMaps);
|
||||
Value *NewOperand =
|
||||
getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps);
|
||||
|
||||
assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
|
||||
|
||||
|
@ -714,23 +695,22 @@ void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
|
|||
VectorMap[Inst] = NewInst;
|
||||
}
|
||||
|
||||
void VectorBlockGenerator::copyStore(const StoreInst *Store,
|
||||
ValueMapT &VectorMap,
|
||||
VectorValueMapT &ScalarMaps) {
|
||||
void VectorBlockGenerator::copyStore(
|
||||
const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
|
||||
int VectorWidth = getVectorWidth();
|
||||
|
||||
MemoryAccess &Access = Statement.getAccessFor(Store);
|
||||
|
||||
const Value *Pointer = Store->getPointerOperand();
|
||||
Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
|
||||
ScalarMaps);
|
||||
Value *Vector =
|
||||
getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps);
|
||||
|
||||
if (Access.isStrideOne(isl_map_copy(Schedule))) {
|
||||
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
|
||||
Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
|
||||
|
||||
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
|
||||
"vector_ptr");
|
||||
Value *VectorPtr =
|
||||
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
|
||||
StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
|
||||
|
||||
if (!Aligned)
|
||||
|
@ -747,7 +727,8 @@ void VectorBlockGenerator::copyStore(const StoreInst *Store,
|
|||
bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
|
||||
ValueMapT &VectorMap) {
|
||||
for (Instruction::const_op_iterator OI = Inst->op_begin(),
|
||||
OE = Inst->op_end(); OI != OE; ++OI)
|
||||
OE = Inst->op_end();
|
||||
OI != OE; ++OI)
|
||||
if (VectorMap.count(*OI))
|
||||
return true;
|
||||
return false;
|
||||
|
@ -760,7 +741,8 @@ bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
|
|||
int VectorWidth = getVectorWidth();
|
||||
|
||||
for (Instruction::const_op_iterator OI = Inst->op_begin(),
|
||||
OE = Inst->op_end(); OI != OE; ++OI) {
|
||||
OE = Inst->op_end();
|
||||
OI != OE; ++OI) {
|
||||
ValueMapT::iterator VecOp = VectorMap.find(*OI);
|
||||
|
||||
if (VecOp == VectorMap.end())
|
||||
|
@ -810,9 +792,7 @@ void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
|
|||
VectorMap[Inst] = Vector;
|
||||
}
|
||||
|
||||
int VectorBlockGenerator::getVectorWidth() {
|
||||
return GlobalMaps.size();
|
||||
}
|
||||
int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
|
||||
|
||||
void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
|
||||
ValueMapT &VectorMap,
|
||||
|
@ -855,8 +835,8 @@ void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
|
|||
|
||||
void VectorBlockGenerator::copyBB() {
|
||||
BasicBlock *BB = Statement.getBasicBlock();
|
||||
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
|
||||
Builder.GetInsertPoint(), P);
|
||||
BasicBlock *CopyBB =
|
||||
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
|
||||
CopyBB->setName("polly.stmt." + BB->getName());
|
||||
Builder.SetInsertPoint(CopyBB->begin());
|
||||
|
||||
|
@ -877,7 +857,7 @@ void VectorBlockGenerator::copyBB() {
|
|||
VectorValueMapT ScalarBlockMap(getVectorWidth());
|
||||
ValueMapT VectorBlockMap;
|
||||
|
||||
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
|
||||
II != IE; ++II)
|
||||
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
|
||||
++II)
|
||||
copyInstruction(II, VectorBlockMap, ScalarBlockMap);
|
||||
}
|
||||
|
|
|
@ -97,9 +97,7 @@ public:
|
|||
//close(FD[1]);
|
||||
}
|
||||
|
||||
FILE *getInputFile() {
|
||||
return input;
|
||||
}
|
||||
FILE *getInputFile() { return input; }
|
||||
|
||||
void closeInput() {
|
||||
fclose(input);
|
||||
|
@ -141,9 +139,7 @@ void Cloog::pprint(raw_ostream &OS) {
|
|||
}
|
||||
|
||||
/// Create the Cloog AST from this program.
|
||||
struct clast_root *Cloog::getClast() {
|
||||
return (clast_root*)ClastRoot;
|
||||
}
|
||||
struct clast_root *Cloog::getClast() { return (clast_root *)ClastRoot; }
|
||||
|
||||
void Cloog::buildCloogOptions() {
|
||||
Options = cloog_options_malloc(State);
|
||||
|
@ -234,20 +230,13 @@ void ClastVisitor::visit(const clast_stmt *stmt) {
|
|||
visit(stmt->next);
|
||||
}
|
||||
|
||||
void ClastVisitor::visitAssignment(const clast_assignment *stmt) {
|
||||
}
|
||||
void ClastVisitor::visitAssignment(const clast_assignment *stmt) {}
|
||||
|
||||
void ClastVisitor::visitBlock(const clast_block *stmt) {
|
||||
visit(stmt->body);
|
||||
}
|
||||
void ClastVisitor::visitBlock(const clast_block *stmt) { visit(stmt->body); }
|
||||
|
||||
void ClastVisitor::visitFor(const clast_for *stmt) {
|
||||
visit(stmt->body);
|
||||
}
|
||||
void ClastVisitor::visitFor(const clast_for *stmt) { visit(stmt->body); }
|
||||
|
||||
void ClastVisitor::visitGuard(const clast_guard *stmt) {
|
||||
visit(stmt->then);
|
||||
}
|
||||
void ClastVisitor::visitGuard(const clast_guard *stmt) { visit(stmt->then); }
|
||||
|
||||
} // End namespace polly.
|
||||
|
||||
|
@ -310,29 +299,20 @@ void CloogExporter::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
AU.addRequired<CloogInfo>();
|
||||
}
|
||||
|
||||
static RegisterPass<CloogExporter> A("polly-export-cloog",
|
||||
"Polly - Export the Cloog input file"
|
||||
" (Writes a .cloog file for each Scop)"
|
||||
);
|
||||
static RegisterPass<CloogExporter>
|
||||
A("polly-export-cloog", "Polly - Export the Cloog input file"
|
||||
" (Writes a .cloog file for each Scop)");
|
||||
|
||||
llvm::Pass *polly::createCloogExporterPass() {
|
||||
return new CloogExporter();
|
||||
}
|
||||
llvm::Pass *polly::createCloogExporterPass() { return new CloogExporter(); }
|
||||
|
||||
/// Write a .cloog input file
|
||||
void CloogInfo::dump(FILE *F) {
|
||||
C->dump(F);
|
||||
}
|
||||
void CloogInfo::dump(FILE *F) { C->dump(F); }
|
||||
|
||||
/// Print a source code representation of the program.
|
||||
void CloogInfo::pprint(llvm::raw_ostream &OS) {
|
||||
C->pprint(OS);
|
||||
}
|
||||
void CloogInfo::pprint(llvm::raw_ostream &OS) { C->pprint(OS); }
|
||||
|
||||
/// Create the Cloog AST from this program.
|
||||
const struct clast_root *CloogInfo::getClast() {
|
||||
return C->getClast();
|
||||
}
|
||||
const struct clast_root *CloogInfo::getClast() { return C->getClast(); }
|
||||
|
||||
void CloogInfo::releaseMemory() {
|
||||
if (C) {
|
||||
|
|
|
@ -61,25 +61,22 @@ struct isl_set;
|
|||
|
||||
namespace polly {
|
||||
static cl::opt<bool>
|
||||
OpenMP("enable-polly-openmp",
|
||||
cl::desc("Generate OpenMP parallel code"), cl::Hidden,
|
||||
cl::value_desc("OpenMP code generation enabled if true"),
|
||||
OpenMP("enable-polly-openmp", cl::desc("Generate OpenMP parallel code"),
|
||||
cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
|
||||
cl::init(false), cl::ZeroOrMore);
|
||||
|
||||
#ifdef GPU_CODEGEN
|
||||
static cl::opt<bool>
|
||||
GPGPU("enable-polly-gpgpu",
|
||||
cl::desc("Generate GPU parallel code"), cl::Hidden,
|
||||
cl::value_desc("GPGPU code generation enabled if true"),
|
||||
cl::init(false), cl::ZeroOrMore);
|
||||
GPGPU("enable-polly-gpgpu", cl::desc("Generate GPU parallel code"), cl::Hidden,
|
||||
cl::value_desc("GPGPU code generation enabled if true"), cl::init(false),
|
||||
cl::ZeroOrMore);
|
||||
|
||||
static cl::opt<std::string>
|
||||
GPUTriple("polly-gpgpu-triple",
|
||||
cl::desc("Target triple for GPU code generation"),
|
||||
static cl::opt<std::string> GPUTriple(
|
||||
"polly-gpgpu-triple", cl::desc("Target triple for GPU code generation"),
|
||||
cl::Hidden, cl::init(""));
|
||||
#endif /* GPU_CODEGEN */
|
||||
|
||||
typedef DenseMap<const char*, Value*> CharMapT;
|
||||
typedef DenseMap<const char *, Value *> CharMapT;
|
||||
|
||||
/// Class to generate LLVM-IR that calculates the value of a clast_expr.
|
||||
class ClastExpCodeGen {
|
||||
|
@ -140,8 +137,7 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
|
|||
switch (e->type) {
|
||||
case clast_bin_mod:
|
||||
return Builder.CreateSRem(LHS, RHS);
|
||||
case clast_bin_fdiv:
|
||||
{
|
||||
case clast_bin_fdiv: {
|
||||
// floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
|
||||
Value *One = ConstantInt::get(Ty, 1);
|
||||
Value *Zero = ConstantInt::get(Ty, 0);
|
||||
|
@ -151,8 +147,7 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
|
|||
Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS);
|
||||
return Builder.CreateSDiv(Dividend, RHS);
|
||||
}
|
||||
case clast_bin_cdiv:
|
||||
{
|
||||
case clast_bin_cdiv: {
|
||||
// ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d
|
||||
Value *One = ConstantInt::get(Ty, 1);
|
||||
Value *Zero = ConstantInt::get(Ty, 0);
|
||||
|
@ -164,30 +159,26 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
|
|||
}
|
||||
case clast_bin_div:
|
||||
return Builder.CreateSDiv(LHS, RHS);
|
||||
};
|
||||
}
|
||||
|
||||
llvm_unreachable("Unknown clast binary expression type");
|
||||
}
|
||||
|
||||
Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) {
|
||||
assert(( r->type == clast_red_min
|
||||
|| r->type == clast_red_max
|
||||
|| r->type == clast_red_sum)
|
||||
&& "Clast reduction type not supported");
|
||||
assert((r->type == clast_red_min || r->type == clast_red_max ||
|
||||
r->type == clast_red_sum) && "Clast reduction type not supported");
|
||||
Value *old = codegen(r->elts[0], Ty);
|
||||
|
||||
for (int i = 1; i < r->n; ++i) {
|
||||
Value *exprValue = codegen(r->elts[i], Ty);
|
||||
|
||||
switch (r->type) {
|
||||
case clast_red_min:
|
||||
{
|
||||
case clast_red_min: {
|
||||
Value *cmp = Builder.CreateICmpSLT(old, exprValue);
|
||||
old = Builder.CreateSelect(cmp, old, exprValue);
|
||||
break;
|
||||
}
|
||||
case clast_red_max:
|
||||
{
|
||||
case clast_red_max: {
|
||||
Value *cmp = Builder.CreateICmpSGT(old, exprValue);
|
||||
old = Builder.CreateSelect(cmp, old, exprValue);
|
||||
break;
|
||||
|
@ -205,7 +196,7 @@ ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap)
|
|||
: Builder(B), IVS(IVMap) {}
|
||||
|
||||
Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) {
|
||||
switch(e->type) {
|
||||
switch (e->type) {
|
||||
case clast_expr_name:
|
||||
return codegen((const clast_name *)e, Ty);
|
||||
case clast_expr_term:
|
||||
|
@ -260,11 +251,11 @@ private:
|
|||
unsigned Dimension, int vectorDim,
|
||||
std::vector<ValueMapT> *VectorVMap = 0);
|
||||
|
||||
void codegenSubstitutions(const clast_stmt *Assignment,
|
||||
ScopStmt *Statement, int vectorDim = 0,
|
||||
void codegenSubstitutions(const clast_stmt *Assignment, ScopStmt *Statement,
|
||||
int vectorDim = 0,
|
||||
std::vector<ValueMapT> *VectorVMap = 0);
|
||||
|
||||
void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL,
|
||||
void codegen(const clast_user_stmt *u, std::vector<Value *> *IVS = NULL,
|
||||
const char *iterator = NULL, isl_set *scatteringDomain = 0);
|
||||
|
||||
void codegen(const clast_block *b);
|
||||
|
@ -276,7 +267,7 @@ private:
|
|||
///
|
||||
/// Create a list of values that has to be stored into the OpenMP subfuncition
|
||||
/// structure.
|
||||
SetVector<Value*> getOMPValues(const clast_stmt *Body);
|
||||
SetVector<Value *> getOMPValues(const clast_stmt *Body);
|
||||
|
||||
/// @brief Update ClastVars and ValueMap according to a value map.
|
||||
///
|
||||
|
@ -295,7 +286,7 @@ private:
|
|||
/// Create a list of values that will be set to be parameters of the GPGPU
|
||||
/// subfunction. These parameters represent device memory base addresses
|
||||
/// and the size in bytes.
|
||||
SetVector<Value*> getGPUValues(unsigned &OutputBytes);
|
||||
SetVector<Value *> getGPUValues(unsigned &OutputBytes);
|
||||
|
||||
/// @brief Create a GPU parallel for loop.
|
||||
///
|
||||
|
@ -304,10 +295,9 @@ private:
|
|||
void codegenForGPGPU(const clast_for *F);
|
||||
|
||||
/// @brief Get innermost for loop.
|
||||
const clast_stmt *getScheduleInfo(const clast_for *F,
|
||||
std::vector<int> &NumIters,
|
||||
unsigned &LoopDepth,
|
||||
unsigned &NonPLoopDepth);
|
||||
const clast_stmt *
|
||||
getScheduleInfo(const clast_for *F, std::vector<int> &NumIters,
|
||||
unsigned &LoopDepth, unsigned &NonPLoopDepth);
|
||||
#endif /* GPU_CODEGEN */
|
||||
|
||||
/// @brief Check if a loop is parallel
|
||||
|
@ -379,15 +369,15 @@ void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt,
|
|||
ValueMap[PN] = RHS;
|
||||
}
|
||||
|
||||
void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment,
|
||||
ScopStmt *Statement, int vectorDim,
|
||||
void ClastStmtCodeGen::codegenSubstitutions(
|
||||
const clast_stmt *Assignment, ScopStmt *Statement, int vectorDim,
|
||||
std::vector<ValueMapT> *VectorVMap) {
|
||||
int Dimension = 0;
|
||||
|
||||
while (Assignment) {
|
||||
assert(CLAST_STMT_IS_A(Assignment, stmt_ass) &&
|
||||
"Substitions are expected to be assignments");
|
||||
codegen((const clast_assignment *) Assignment, Statement, Dimension,
|
||||
codegen((const clast_assignment *)Assignment, Statement, Dimension,
|
||||
vectorDim, VectorVMap);
|
||||
Assignment = Assignment->next;
|
||||
Dimension++;
|
||||
|
@ -397,18 +387,19 @@ void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment,
|
|||
// Takes the cloog specific domain and translates it into a map Statement ->
|
||||
// PartialSchedule, where the PartialSchedule contains all the dimensions that
|
||||
// have been code generated up to this point.
|
||||
static __isl_give isl_map *extractPartialSchedule(ScopStmt *Statement,
|
||||
isl_set *Domain) {
|
||||
static __isl_give isl_map *
|
||||
extractPartialSchedule(ScopStmt *Statement, isl_set *Domain) {
|
||||
isl_map *Schedule = Statement->getScattering();
|
||||
int ScheduledDimensions = isl_set_dim(Domain, isl_dim_set);
|
||||
int UnscheduledDimensions = isl_map_dim(Schedule, isl_dim_out) - ScheduledDimensions;
|
||||
int UnscheduledDimensions =
|
||||
isl_map_dim(Schedule, isl_dim_out) - ScheduledDimensions;
|
||||
|
||||
return isl_map_project_out(Schedule, isl_dim_out, ScheduledDimensions,
|
||||
UnscheduledDimensions);
|
||||
}
|
||||
|
||||
void ClastStmtCodeGen::codegen(const clast_user_stmt *u,
|
||||
std::vector<Value*> *IVS , const char *iterator,
|
||||
std::vector<Value *> *IVS, const char *iterator,
|
||||
isl_set *Domain) {
|
||||
ScopStmt *Statement = (ScopStmt *)u->statement->usr;
|
||||
|
||||
|
@ -427,7 +418,7 @@ void ClastStmtCodeGen::codegen(const clast_user_stmt *u,
|
|||
if (IVS) {
|
||||
assert(u->substitutions && "Substitutions expected!");
|
||||
int i = 0;
|
||||
for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end();
|
||||
for (std::vector<Value *>::iterator II = IVS->begin(), IE = IVS->end();
|
||||
II != IE; ++II) {
|
||||
ClastVars[iterator] = *II;
|
||||
codegenSubstitutions(u->substitutions, Statement, i, &VectorMap);
|
||||
|
@ -473,7 +464,7 @@ void ClastStmtCodeGen::codegenForSequential(const clast_for *f) {
|
|||
class ParameterVisitor : public ClastVisitor {
|
||||
std::set<Value *> Values;
|
||||
public:
|
||||
ParameterVisitor() : ClastVisitor(), Values() { }
|
||||
ParameterVisitor() : ClastVisitor(), Values() {}
|
||||
|
||||
void visitUser(const clast_user_stmt *Stmt) {
|
||||
const ScopStmt *S = static_cast<const ScopStmt *>(Stmt->statement->usr);
|
||||
|
@ -484,7 +475,8 @@ public:
|
|||
++BI) {
|
||||
const Instruction &Inst = *BI;
|
||||
for (Instruction::const_op_iterator II = Inst.op_begin(),
|
||||
IE = Inst.op_end(); II != IE; ++II) {
|
||||
IE = Inst.op_end();
|
||||
II != IE; ++II) {
|
||||
Value *SrcVal = *II;
|
||||
|
||||
if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal))
|
||||
|
@ -503,12 +495,12 @@ public:
|
|||
inline const_iterator end() const { return Values.end(); }
|
||||
};
|
||||
|
||||
SetVector<Value*> ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) {
|
||||
SetVector<Value*> Values;
|
||||
SetVector<Value *> ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) {
|
||||
SetVector<Value *> Values;
|
||||
|
||||
// The clast variables
|
||||
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
|
||||
I != E; I++)
|
||||
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); I != E;
|
||||
I++)
|
||||
Values.insert(I->second);
|
||||
|
||||
// Find the temporaries that are referenced in the clast statements'
|
||||
|
@ -530,16 +522,17 @@ SetVector<Value*> ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) {
|
|||
|
||||
void ClastStmtCodeGen::updateWithValueMap(
|
||||
OMPGenerator::ValueToValueMapTy &VMap) {
|
||||
std::set<Value*> Inserted;
|
||||
std::set<Value *> Inserted;
|
||||
|
||||
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
|
||||
I != E; I++) {
|
||||
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); I != E;
|
||||
I++) {
|
||||
ClastVars[I->first] = VMap[I->second];
|
||||
Inserted.insert(I->second);
|
||||
}
|
||||
|
||||
for (OMPGenerator::ValueToValueMapTy::iterator I = VMap.begin(),
|
||||
E = VMap.end(); I != E; ++I) {
|
||||
E = VMap.end();
|
||||
I != E; ++I) {
|
||||
if (Inserted.count(I->first))
|
||||
continue;
|
||||
|
||||
|
@ -549,11 +542,11 @@ void ClastStmtCodeGen::updateWithValueMap(
|
|||
|
||||
static void clearDomtree(Function *F, DominatorTree &DT) {
|
||||
DomTreeNode *N = DT.getNode(&F->getEntryBlock());
|
||||
std::vector<BasicBlock*> Nodes;
|
||||
for (po_iterator<DomTreeNode*> I = po_begin(N), E = po_end(N); I != E; ++I)
|
||||
std::vector<BasicBlock *> Nodes;
|
||||
for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I)
|
||||
Nodes.push_back(I->getBlock());
|
||||
|
||||
for (std::vector<BasicBlock*>::iterator I = Nodes.begin(), E = Nodes.end();
|
||||
for (std::vector<BasicBlock *>::iterator I = Nodes.begin(), E = Nodes.end();
|
||||
I != E; ++I)
|
||||
DT.eraseNode(*I);
|
||||
}
|
||||
|
@ -562,7 +555,7 @@ void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) {
|
|||
Value *Stride, *LB, *UB, *IV;
|
||||
BasicBlock::iterator LoopBody;
|
||||
IntegerType *IntPtrTy = getIntPtrTy();
|
||||
SetVector<Value*> Values;
|
||||
SetVector<Value *> Values;
|
||||
OMPGenerator::ValueToValueMapTy VMap;
|
||||
OMPGenerator OMPGen(Builder, P);
|
||||
|
||||
|
@ -608,16 +601,17 @@ static unsigned getArraySizeInBytes(const ArrayType *AT) {
|
|||
return Bytes;
|
||||
}
|
||||
|
||||
SetVector<Value*> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) {
|
||||
SetVector<Value*> Values;
|
||||
SetVector<Value *> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) {
|
||||
SetVector<Value *> Values;
|
||||
OutputBytes = 0;
|
||||
|
||||
// Record the memory reference base addresses.
|
||||
for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) {
|
||||
ScopStmt *Stmt = *SI;
|
||||
for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(),
|
||||
E = Stmt->memacc_end(); I != E; ++I) {
|
||||
Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr());
|
||||
for (SmallVector<MemoryAccess *, 8>::iterator I = Stmt->memacc_begin(),
|
||||
E = Stmt->memacc_end();
|
||||
I != E; ++I) {
|
||||
Value *BaseAddr = const_cast<Value *>((*I)->getBaseAddr());
|
||||
Values.insert((BaseAddr));
|
||||
|
||||
// FIXME: we assume that there is one and only one array to be written
|
||||
|
@ -627,7 +621,7 @@ SetVector<Value*> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) {
|
|||
++NumWrites;
|
||||
assert(NumWrites <= 1 &&
|
||||
"We support at most one array to be written in a SCoP.");
|
||||
if (const PointerType * PT =
|
||||
if (const PointerType *PT =
|
||||
dyn_cast<PointerType>(BaseAddr->getType())) {
|
||||
Type *T = PT->getArrayElementType();
|
||||
const ArrayType *ATy = dyn_cast<ArrayType>(T);
|
||||
|
@ -640,9 +634,8 @@ SetVector<Value*> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) {
|
|||
return Values;
|
||||
}
|
||||
|
||||
const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F,
|
||||
std::vector<int> &NumIters,
|
||||
unsigned &LoopDepth,
|
||||
const clast_stmt *ClastStmtCodeGen::getScheduleInfo(
|
||||
const clast_for *F, std::vector<int> &NumIters, unsigned &LoopDepth,
|
||||
unsigned &NonPLoopDepth) {
|
||||
clast_stmt *Stmt = (clast_stmt *)F;
|
||||
const clast_for *Result;
|
||||
|
@ -652,7 +645,7 @@ const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F,
|
|||
|
||||
while (Stmt) {
|
||||
if (CLAST_STMT_IS_A(Stmt, stmt_for)) {
|
||||
const clast_for *T = (clast_for *) Stmt;
|
||||
const clast_for *T = (clast_for *)Stmt;
|
||||
if (isParallelFor(T)) {
|
||||
if (!NonParaFlag) {
|
||||
NumIters.push_back(getNumberOfIterations(T));
|
||||
|
@ -672,8 +665,8 @@ const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F,
|
|||
"The loops should be tiled into 4-depth parallel loops and an "
|
||||
"innermost non-parallel one (if exist).");
|
||||
NonPLoopDepth = LoopDepth - NumIters.size();
|
||||
assert(NonPLoopDepth <= 1
|
||||
&& "We support only one innermost non-parallel loop currently.");
|
||||
assert(NonPLoopDepth <= 1 &&
|
||||
"We support only one innermost non-parallel loop currently.");
|
||||
return (const clast_stmt *)Result->body;
|
||||
}
|
||||
|
||||
|
@ -690,8 +683,8 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) {
|
|||
|
||||
// Get original IVS and ScopStmt
|
||||
unsigned TiledLoopDepth, NonPLoopDepth;
|
||||
const clast_stmt *InnerStmt = getScheduleInfo(F, NumIterations,
|
||||
TiledLoopDepth, NonPLoopDepth);
|
||||
const clast_stmt *InnerStmt =
|
||||
getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth);
|
||||
const clast_stmt *TmpStmt;
|
||||
const clast_user_stmt *U;
|
||||
const clast_for *InnerFor;
|
||||
|
@ -700,8 +693,8 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) {
|
|||
TmpStmt = InnerFor->body;
|
||||
} else
|
||||
TmpStmt = InnerStmt;
|
||||
U = (const clast_user_stmt *) TmpStmt;
|
||||
ScopStmt *Statement = (ScopStmt *) U->statement->usr;
|
||||
U = (const clast_user_stmt *)TmpStmt;
|
||||
ScopStmt *Statement = (ScopStmt *)U->statement->usr;
|
||||
for (unsigned i = 0; i < Statement->getNumIterators() - NonPLoopDepth; i++) {
|
||||
const Value *IV = Statement->getInductionVariableForDimension(i);
|
||||
IVS.insert(const_cast<Value *>(IV));
|
||||
|
@ -726,7 +719,7 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) {
|
|||
CmpInst::ICMP_SLE);
|
||||
const Value *OldIV_ = Statement->getInductionVariableForDimension(2);
|
||||
Value *OldIV = const_cast<Value *>(OldIV_);
|
||||
VMap.insert(std::make_pair<Value*, Value*>(OldIV, IV));
|
||||
VMap.insert(std::make_pair<Value *, Value *>(OldIV, IV));
|
||||
}
|
||||
|
||||
updateWithValueMap(VMap);
|
||||
|
@ -787,11 +780,11 @@ void ClastStmtCodeGen::codegenForVector(const clast_for *F) {
|
|||
Stride = Stride.zext(LoopIVType->getBitWidth());
|
||||
Value *StrideValue = ConstantInt::get(LoopIVType, Stride);
|
||||
|
||||
std::vector<Value*> IVS(VectorWidth);
|
||||
std::vector<Value *> IVS(VectorWidth);
|
||||
IVS[0] = LB;
|
||||
|
||||
for (int i = 1; i < VectorWidth; i++)
|
||||
IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv");
|
||||
IVS[i] = Builder.CreateAdd(IVS[i - 1], StrideValue, "p_vector_iv");
|
||||
|
||||
isl_set *Domain = isl_set_from_cloog_domain(F->domain);
|
||||
|
||||
|
@ -872,8 +865,8 @@ void ClastStmtCodeGen::codegen(const clast_guard *g) {
|
|||
Function *F = Builder.GetInsertBlock()->getParent();
|
||||
LLVMContext &Context = F->getContext();
|
||||
|
||||
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
|
||||
Builder.GetInsertPoint(), P);
|
||||
BasicBlock *CondBB =
|
||||
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
|
||||
CondBB->setName("polly.cond");
|
||||
BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P);
|
||||
MergeBB->setName("polly.merge");
|
||||
|
@ -946,7 +939,7 @@ void ClastStmtCodeGen::codegen(const clast_root *r) {
|
|||
|
||||
parallelCodeGeneration = false;
|
||||
|
||||
const clast_stmt *stmt = (const clast_stmt*) r;
|
||||
const clast_stmt *stmt = (const clast_stmt *)r;
|
||||
if (stmt->next)
|
||||
codegen(stmt->next);
|
||||
}
|
||||
|
@ -964,7 +957,6 @@ public:
|
|||
|
||||
CodeGeneration() : ScopPass(ID) {}
|
||||
|
||||
|
||||
bool runOnScop(Scop &S) {
|
||||
ParallelLoops.clear();
|
||||
|
||||
|
@ -986,7 +978,8 @@ public:
|
|||
|
||||
virtual void printScop(raw_ostream &OS) const {
|
||||
for (std::vector<std::string>::const_iterator PI = ParallelLoops.begin(),
|
||||
PE = ParallelLoops.end(); PI != PE; ++PI)
|
||||
PE = ParallelLoops.end();
|
||||
PI != PE; ++PI)
|
||||
OS << "Parallel loop with iterator '" << *PI << "' generated\n";
|
||||
}
|
||||
|
||||
|
|
|
@ -78,10 +78,9 @@ struct AstBuildUserInfo {
|
|||
};
|
||||
|
||||
// Print a loop annotated with OpenMP or vector pragmas.
|
||||
static __isl_give isl_printer *
|
||||
printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
|
||||
__isl_take isl_ast_print_options *PrintOptions,
|
||||
IslAstUser *Info) {
|
||||
static __isl_give isl_printer *printParallelFor(
|
||||
__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
|
||||
__isl_take isl_ast_print_options *PrintOptions, IslAstUser *Info) {
|
||||
if (Info) {
|
||||
if (Info->IsInnermostParallel) {
|
||||
Printer = isl_printer_start_line(Printer);
|
||||
|
@ -106,7 +105,7 @@ printFor(__isl_take isl_printer *Printer,
|
|||
if (!Id)
|
||||
return isl_ast_node_for_print(Node, Printer, PrintOptions);
|
||||
|
||||
struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
|
||||
struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Id);
|
||||
Printer = printParallelFor(Node, Printer, PrintOptions, Info);
|
||||
isl_id_free(Id);
|
||||
return Printer;
|
||||
|
@ -115,7 +114,7 @@ printFor(__isl_take isl_printer *Printer,
|
|||
// Allocate an AstNodeInfo structure and initialize it with default values.
|
||||
static struct IslAstUser *allocateIslAstUser() {
|
||||
struct IslAstUser *NodeInfo;
|
||||
NodeInfo = (struct IslAstUser *) malloc(sizeof(struct IslAstUser));
|
||||
NodeInfo = (struct IslAstUser *)malloc(sizeof(struct IslAstUser));
|
||||
NodeInfo->PMA = 0;
|
||||
NodeInfo->Context = 0;
|
||||
NodeInfo->IsOutermostParallel = 0;
|
||||
|
@ -125,7 +124,7 @@ static struct IslAstUser *allocateIslAstUser() {
|
|||
|
||||
// Free the AstNodeInfo structure.
|
||||
static void freeIslAstUser(void *Ptr) {
|
||||
struct IslAstUser *UserStruct = (struct IslAstUser *) Ptr;
|
||||
struct IslAstUser *UserStruct = (struct IslAstUser *)Ptr;
|
||||
isl_ast_build_free(UserStruct->Context);
|
||||
isl_pw_multi_aff_free(UserStruct->PMA);
|
||||
free(UserStruct);
|
||||
|
@ -202,9 +201,9 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
|
|||
//
|
||||
// - Detection of openmp parallel loops
|
||||
//
|
||||
static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
|
||||
void *User) {
|
||||
struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User;
|
||||
static __isl_give isl_id *
|
||||
astBuildBeforeFor(__isl_keep isl_ast_build *Build, void *User) {
|
||||
struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *)User;
|
||||
struct IslAstUser *NodeInfo = allocateIslAstUser();
|
||||
isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
|
||||
Id = isl_id_set_free_user(Id, freeIslAstUser);
|
||||
|
@ -262,13 +261,13 @@ static bool containsLoops(__isl_take isl_ast_node *Node) {
|
|||
// that is marked as openmp parallel.
|
||||
//
|
||||
static __isl_give isl_ast_node *
|
||||
astBuildAfterFor(__isl_take isl_ast_node *Node,
|
||||
__isl_keep isl_ast_build *Build, void *User) {
|
||||
astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
|
||||
void *User) {
|
||||
isl_id *Id = isl_ast_node_get_annotation(Node);
|
||||
if (!Id)
|
||||
return Node;
|
||||
struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
|
||||
struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User;
|
||||
struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Id);
|
||||
struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *)User;
|
||||
|
||||
if (Info) {
|
||||
if (Info->IsOutermostParallel)
|
||||
|
@ -285,14 +284,13 @@ astBuildAfterFor(__isl_take isl_ast_node *Node,
|
|||
}
|
||||
|
||||
static __isl_give isl_ast_node *
|
||||
AtEachDomain(__isl_take isl_ast_node *Node,
|
||||
__isl_keep isl_ast_build *Context, void *User)
|
||||
{
|
||||
AtEachDomain(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Context,
|
||||
void *User) {
|
||||
struct IslAstUser *Info = NULL;
|
||||
isl_id *Id = isl_ast_node_get_annotation(Node);
|
||||
|
||||
if (Id)
|
||||
Info = (struct IslAstUser *) isl_id_get_user(Id);
|
||||
Info = (struct IslAstUser *)isl_id_get_user(Id);
|
||||
|
||||
if (!Info) {
|
||||
// Allocate annotations once: parallel for detection might have already
|
||||
|
@ -360,16 +358,14 @@ __isl_give isl_union_map *IslAst::getSchedule() {
|
|||
isl_map *StmtSchedule = Stmt->getScattering();
|
||||
|
||||
StmtSchedule = isl_map_intersect_domain(StmtSchedule, Stmt->getDomain());
|
||||
Schedule = isl_union_map_union(Schedule,
|
||||
isl_union_map_from_map(StmtSchedule));
|
||||
Schedule =
|
||||
isl_union_map_union(Schedule, isl_union_map_from_map(StmtSchedule));
|
||||
}
|
||||
|
||||
return Schedule;
|
||||
}
|
||||
|
||||
IslAst::~IslAst() {
|
||||
isl_ast_node_free(Root);
|
||||
}
|
||||
IslAst::~IslAst() { isl_ast_node_free(Root); }
|
||||
|
||||
/// Print a C like representation of the program.
|
||||
void IslAst::pprint(llvm::raw_ostream &OS) {
|
||||
|
@ -390,13 +386,9 @@ void IslAst::pprint(llvm::raw_ostream &OS) {
|
|||
}
|
||||
|
||||
/// Create the isl_ast from this program.
|
||||
__isl_give isl_ast_node *IslAst::getAst() {
|
||||
return isl_ast_node_copy(Root);
|
||||
}
|
||||
__isl_give isl_ast_node *IslAst::getAst() { return isl_ast_node_copy(Root); }
|
||||
|
||||
void IslAstInfo::pprint(llvm::raw_ostream &OS) {
|
||||
Ast->pprint(OS);
|
||||
}
|
||||
void IslAstInfo::pprint(llvm::raw_ostream &OS) { Ast->pprint(OS); }
|
||||
|
||||
void IslAstInfo::releaseMemory() {
|
||||
if (Ast) {
|
||||
|
@ -418,9 +410,7 @@ bool IslAstInfo::runOnScop(Scop &Scop) {
|
|||
return false;
|
||||
}
|
||||
|
||||
__isl_give isl_ast_node *IslAstInfo::getAst() {
|
||||
return Ast->getAst();
|
||||
}
|
||||
__isl_give isl_ast_node *IslAstInfo::getAst() { return Ast->getAst(); }
|
||||
|
||||
void IslAstInfo::printScop(raw_ostream &OS) const {
|
||||
Function *F = S->getRegion().getEntry()->getParent();
|
||||
|
|
|
@ -93,8 +93,8 @@ Function *RuntimeDebugBuilder::getPrintF() {
|
|||
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
|
||||
Builder.getInt8PtrTy(), true);
|
||||
FunctionType *Ty =
|
||||
FunctionType::get(Builder.getInt32Ty(), Builder.getInt8PtrTy(), true);
|
||||
F = Function::Create(Ty, Linkage, Name, M);
|
||||
}
|
||||
|
||||
|
@ -108,8 +108,8 @@ void RuntimeDebugBuilder::createFlush() {
|
|||
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
|
||||
Builder.getInt8PtrTy(), false);
|
||||
FunctionType *Ty =
|
||||
FunctionType::get(Builder.getInt32Ty(), Builder.getInt8PtrTy(), false);
|
||||
F = Function::Create(Ty, Linkage, Name, M);
|
||||
}
|
||||
|
||||
|
@ -148,7 +148,7 @@ public:
|
|||
|
||||
private:
|
||||
IRBuilder<> &Builder;
|
||||
std::map<isl_id *, Value*> &IDToValue;
|
||||
std::map<isl_id *, Value *> &IDToValue;
|
||||
|
||||
Value *createOp(__isl_take isl_ast_expr *Expr);
|
||||
Value *createOpUnary(__isl_take isl_ast_expr *Expr);
|
||||
|
@ -213,14 +213,12 @@ Value *IslExprBuilder::createOpNAry(__isl_take isl_ast_expr *Expr) {
|
|||
default:
|
||||
llvm_unreachable("This is no n-ary isl ast expression");
|
||||
|
||||
case isl_ast_op_max:
|
||||
{
|
||||
case isl_ast_op_max: {
|
||||
Value *Cmp = Builder.CreateICmpSGT(V, OpV);
|
||||
V = Builder.CreateSelect(Cmp, V, OpV);
|
||||
continue;
|
||||
}
|
||||
case isl_ast_op_min:
|
||||
{
|
||||
case isl_ast_op_min: {
|
||||
Value *Cmp = Builder.CreateICmpSLT(V, OpV);
|
||||
V = Builder.CreateSelect(Cmp, V, OpV);
|
||||
continue;
|
||||
|
@ -299,8 +297,7 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) {
|
|||
case isl_ast_op_pdiv_q: // Dividend is non-negative
|
||||
Res = Builder.CreateSDiv(LHS, RHS);
|
||||
break;
|
||||
case isl_ast_op_fdiv_q: // Round towards -infty
|
||||
{
|
||||
case isl_ast_op_fdiv_q: { // Round towards -infty
|
||||
// TODO: Review code and check that this calculation does not yield
|
||||
// incorrect overflow in some bordercases.
|
||||
//
|
||||
|
@ -556,7 +553,7 @@ private:
|
|||
// This maps an isl_id* to the Value* it has in the generated program. For now
|
||||
// on, the only isl_ids that are stored here are the newly calculated loop
|
||||
// ivs.
|
||||
std::map<isl_id *, Value*> IDToValue;
|
||||
std::map<isl_id *, Value *> IDToValue;
|
||||
|
||||
// Extract the upper bound of this loop
|
||||
//
|
||||
|
@ -577,8 +574,8 @@ private:
|
|||
// of loop iterations.
|
||||
//
|
||||
// 3. With the existing code, upper bounds have been easier to implement.
|
||||
__isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For,
|
||||
CmpInst::Predicate &Predicate);
|
||||
__isl_give isl_ast_expr *
|
||||
getUpperBound(__isl_keep isl_ast_node *For, CmpInst::Predicate &Predicate);
|
||||
|
||||
unsigned getNumberOfIterations(__isl_keep isl_ast_node *For);
|
||||
|
||||
|
@ -586,17 +583,16 @@ private:
|
|||
void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
|
||||
void createForSequential(__isl_take isl_ast_node *For);
|
||||
void createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
|
||||
__isl_take isl_ast_build *Context,
|
||||
ScopStmt *Stmt, ValueMapT &VMap);
|
||||
void createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
|
||||
__isl_take isl_ast_build *Context,
|
||||
ScopStmt *Stmt, VectorValueMapT &VMap,
|
||||
std::vector<Value*> &IVS,
|
||||
__isl_take isl_ast_build *Context, ScopStmt *Stmt,
|
||||
ValueMapT &VMap);
|
||||
void createSubstitutionsVector(
|
||||
__isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context,
|
||||
ScopStmt *Stmt, VectorValueMapT &VMap, std::vector<Value *> &IVS,
|
||||
__isl_take isl_id *IteratorID);
|
||||
void createIf(__isl_take isl_ast_node *If);
|
||||
void createUserVector(__isl_take isl_ast_node *User,
|
||||
std::vector<Value*> &IVS, __isl_take isl_id *IteratorID,
|
||||
__isl_take isl_union_map *Schedule);
|
||||
void createUserVector(
|
||||
__isl_take isl_ast_node *User, std::vector<Value *> &IVS,
|
||||
__isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule);
|
||||
void createUser(__isl_take isl_ast_node *User);
|
||||
void createBlock(__isl_take isl_ast_node *Block);
|
||||
};
|
||||
|
@ -656,7 +652,7 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
|
|||
if (!Annotation)
|
||||
return -1;
|
||||
|
||||
struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
|
||||
struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Annotation);
|
||||
if (!Info) {
|
||||
isl_id_free(Annotation);
|
||||
return -1;
|
||||
|
@ -671,18 +667,17 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
|
|||
return NumberOfIterations + 1;
|
||||
}
|
||||
|
||||
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
|
||||
std::vector<Value*> &IVS,
|
||||
__isl_take isl_id *IteratorID,
|
||||
__isl_take isl_union_map *Schedule) {
|
||||
void IslNodeBuilder::createUserVector(
|
||||
__isl_take isl_ast_node *User, std::vector<Value *> &IVS,
|
||||
__isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule) {
|
||||
isl_id *Annotation = isl_ast_node_get_annotation(User);
|
||||
assert(Annotation && "Vector user statement is not annotated");
|
||||
|
||||
struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
|
||||
struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Annotation);
|
||||
assert(Info && "Vector user statement annotation does not contain info");
|
||||
|
||||
isl_id *Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out);
|
||||
ScopStmt *Stmt = (ScopStmt *) isl_id_get_user(Id);
|
||||
ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
|
||||
VectorValueMapT VectorMap(IVS.size());
|
||||
|
||||
isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain());
|
||||
|
@ -726,16 +721,16 @@ void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
|
|||
if (MaxType != ValueInc->getType())
|
||||
ValueInc = Builder.CreateSExt(ValueInc, MaxType);
|
||||
|
||||
std::vector<Value*> IVS(VectorWidth);
|
||||
std::vector<Value *> IVS(VectorWidth);
|
||||
IVS[0] = ValueLB;
|
||||
|
||||
for (int i = 1; i < VectorWidth; i++)
|
||||
IVS[i] = Builder.CreateAdd(IVS[i-1], ValueInc, "p_vector_iv");
|
||||
IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
|
||||
|
||||
isl_id *Annotation = isl_ast_node_get_annotation(For);
|
||||
assert(Annotation && "For statement is not annotated");
|
||||
|
||||
struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
|
||||
struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Annotation);
|
||||
assert(Info && "For statement annotation does not contain info");
|
||||
|
||||
isl_union_map *Schedule = isl_ast_build_get_schedule(Info->Context);
|
||||
|
@ -819,8 +814,8 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
|
|||
// executed at least once, which will enable a lot of loop invariant
|
||||
// code motion.
|
||||
|
||||
IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock,
|
||||
Predicate);
|
||||
IV =
|
||||
createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock, Predicate);
|
||||
IDToValue[IteratorID] = IV;
|
||||
|
||||
create(Body);
|
||||
|
@ -853,8 +848,8 @@ void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
|
|||
Function *F = Builder.GetInsertBlock()->getParent();
|
||||
LLVMContext &Context = F->getContext();
|
||||
|
||||
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
|
||||
Builder.GetInsertPoint(), P);
|
||||
BasicBlock *CondBB =
|
||||
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
|
||||
CondBB->setName("polly.cond");
|
||||
BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P);
|
||||
MergeBB->setName("polly.merge");
|
||||
|
@ -914,13 +909,14 @@ void IslNodeBuilder::createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
|
|||
isl_ast_build_free(Context);
|
||||
}
|
||||
|
||||
void IslNodeBuilder::createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
|
||||
__isl_take isl_ast_build *Context, ScopStmt *Stmt, VectorValueMapT &VMap,
|
||||
std::vector<Value*> &IVS, __isl_take isl_id *IteratorID) {
|
||||
void IslNodeBuilder::createSubstitutionsVector(
|
||||
__isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context,
|
||||
ScopStmt *Stmt, VectorValueMapT &VMap, std::vector<Value *> &IVS,
|
||||
__isl_take isl_id *IteratorID) {
|
||||
int i = 0;
|
||||
|
||||
Value *OldValue = IDToValue[IteratorID];
|
||||
for (std::vector<Value*>::iterator II = IVS.begin(), IE = IVS.end();
|
||||
for (std::vector<Value *>::iterator II = IVS.begin(), IE = IVS.end();
|
||||
II != IE; ++II) {
|
||||
IDToValue[IteratorID] = *II;
|
||||
createSubstitutions(isl_pw_multi_aff_copy(PMA), isl_ast_build_copy(Context),
|
||||
|
@ -943,11 +939,11 @@ void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
|
|||
Annotation = isl_ast_node_get_annotation(User);
|
||||
assert(Annotation && "Scalar user statement is not annotated");
|
||||
|
||||
Info = (struct IslAstUser *) isl_id_get_user(Annotation);
|
||||
Info = (struct IslAstUser *)isl_id_get_user(Annotation);
|
||||
assert(Info && "Scalar user statement annotation does not contain info");
|
||||
|
||||
Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out);
|
||||
Stmt = (ScopStmt *) isl_id_get_user(Id);
|
||||
Stmt = (ScopStmt *)isl_id_get_user(Id);
|
||||
|
||||
createSubstitutions(isl_pw_multi_aff_copy(Info->PMA),
|
||||
isl_ast_build_copy(Info->Context), Stmt, VMap);
|
||||
|
@ -1000,7 +996,7 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
|
|||
Instruction *InsertLocation;
|
||||
|
||||
Id = isl_set_get_dim_id(Context, isl_dim_param, i);
|
||||
Scev = (const SCEV*) isl_id_get_user(Id);
|
||||
Scev = (const SCEV *)isl_id_get_user(Id);
|
||||
T = dyn_cast<IntegerType>(Scev->getType());
|
||||
InsertLocation = --(Builder.GetInsertBlock()->end());
|
||||
Value *V = Rewriter.expandCodeFor(Scev, T, InsertLocation);
|
||||
|
@ -1033,8 +1029,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
virtual void printScop(raw_ostream &OS) const {
|
||||
}
|
||||
virtual void printScop(raw_ostream &OS) const {}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<DominatorTree>();
|
||||
|
|
|
@ -73,12 +73,9 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
|
|||
return IV;
|
||||
}
|
||||
|
||||
void OMPGenerator::createCallParallelLoopStart(Value *SubFunction,
|
||||
Value *SubfunctionParam,
|
||||
Value *NumberOfThreads,
|
||||
Value *LowerBound,
|
||||
Value *UpperBound,
|
||||
Value *Stride) {
|
||||
void OMPGenerator::createCallParallelLoopStart(
|
||||
Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
|
||||
Value *LowerBound, Value *UpperBound, Value *Stride) {
|
||||
Module *M = getModule();
|
||||
const char *Name = "GOMP_parallel_loop_runtime_start";
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -88,35 +85,23 @@ void OMPGenerator::createCallParallelLoopStart(Value *SubFunction,
|
|||
Type *LongTy = getIntPtrTy();
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
|
||||
Type *Params[] = {
|
||||
PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(),
|
||||
Builder.getInt8PtrTy(),
|
||||
false)),
|
||||
Builder.getInt8PtrTy(),
|
||||
Builder.getInt32Ty(),
|
||||
LongTy,
|
||||
LongTy,
|
||||
LongTy,
|
||||
};
|
||||
Type *Params[] = { PointerType::getUnqual(FunctionType::get(
|
||||
Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
|
||||
Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
|
||||
LongTy, LongTy, };
|
||||
|
||||
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
||||
F = Function::Create(Ty, Linkage, Name, M);
|
||||
}
|
||||
|
||||
Value *Args[] = {
|
||||
SubFunction,
|
||||
SubfunctionParam,
|
||||
NumberOfThreads,
|
||||
LowerBound,
|
||||
UpperBound,
|
||||
Stride,
|
||||
};
|
||||
Value *Args[] = { SubFunction, SubfunctionParam, NumberOfThreads, LowerBound,
|
||||
UpperBound, Stride, };
|
||||
|
||||
Builder.CreateCall(F, Args);
|
||||
}
|
||||
|
||||
Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
|
||||
Value *UpperBoundPtr) {
|
||||
Value *
|
||||
OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr) {
|
||||
Module *M = getModule();
|
||||
const char *Name = "GOMP_loop_runtime_next";
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -126,23 +111,17 @@ Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
|
|||
Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
|
||||
Type *Params[] = {
|
||||
LongPtrTy,
|
||||
LongPtrTy,
|
||||
};
|
||||
Type *Params[] = { LongPtrTy, LongPtrTy, };
|
||||
|
||||
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
|
||||
F = Function::Create(Ty, Linkage, Name, M);
|
||||
}
|
||||
|
||||
Value *Args[] = {
|
||||
LowerBoundPtr,
|
||||
UpperBoundPtr,
|
||||
};
|
||||
Value *Args[] = { LowerBoundPtr, UpperBoundPtr, };
|
||||
|
||||
Value *Return = Builder.CreateCall(F, Args);
|
||||
Return = Builder.CreateICmpNE(Return, Builder.CreateZExt(Builder.getFalse(),
|
||||
Return->getType()));
|
||||
Return = Builder.CreateICmpNE(
|
||||
Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
|
||||
return Return;
|
||||
}
|
||||
|
||||
|
@ -189,7 +168,7 @@ Module *OMPGenerator::getModule() {
|
|||
Function *OMPGenerator::createSubfunctionDefinition() {
|
||||
Module *M = getModule();
|
||||
Function *F = Builder.GetInsertBlock()->getParent();
|
||||
std::vector<Type*> Arguments(1, Builder.getInt8PtrTy());
|
||||
std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
|
||||
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
|
||||
Function *FN = Function::Create(FT, Function::InternalLinkage,
|
||||
F->getName() + ".omp_subfn", M);
|
||||
|
@ -202,8 +181,8 @@ Function *OMPGenerator::createSubfunctionDefinition() {
|
|||
return FN;
|
||||
}
|
||||
|
||||
Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value*> &Values) {
|
||||
std::vector<Type*> Members;
|
||||
Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
|
||||
std::vector<Type *> Members;
|
||||
|
||||
for (unsigned i = 0; i < Values.size(); i++)
|
||||
Members.push_back(Values[i]->getType());
|
||||
|
@ -219,20 +198,18 @@ Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value*> &Values) {
|
|||
return Struct;
|
||||
}
|
||||
|
||||
void OMPGenerator::extractValuesFromStruct(SetVector<Value*> OldValues,
|
||||
Value *Struct,
|
||||
ValueToValueMapTy &Map) {
|
||||
void OMPGenerator::extractValuesFromStruct(
|
||||
SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
|
||||
for (unsigned i = 0; i < OldValues.size(); i++) {
|
||||
Value *Address = Builder.CreateStructGEP(Struct, i);
|
||||
Value *NewValue = Builder.CreateLoad(Address);
|
||||
Map.insert(std::make_pair<Value*, Value*>(OldValues[i], NewValue));
|
||||
Map.insert(std::make_pair<Value *, Value *>(OldValues[i], NewValue));
|
||||
}
|
||||
}
|
||||
|
||||
Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
|
||||
SetVector<Value*> Data,
|
||||
ValueToValueMapTy &Map,
|
||||
Function **SubFunction) {
|
||||
Value *OMPGenerator::createSubfunction(
|
||||
Value *Stride, Value *StructData, SetVector<Value *> Data,
|
||||
ValueToValueMapTy &Map, Function **SubFunction) {
|
||||
Function *FN = createSubfunctionDefinition();
|
||||
|
||||
BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
|
||||
|
@ -303,10 +280,9 @@ Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
|
|||
return IV;
|
||||
}
|
||||
|
||||
Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
|
||||
Value *Stride,
|
||||
SetVector<Value*> &Values,
|
||||
ValueToValueMapTy &Map,
|
||||
Value *OMPGenerator::createParallelLoop(
|
||||
Value *LowerBound, Value *UpperBound, Value *Stride,
|
||||
SetVector<Value *> &Values, ValueToValueMapTy &Map,
|
||||
BasicBlock::iterator *LoopBody) {
|
||||
Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
|
||||
Function *SubFunction;
|
||||
|
@ -319,15 +295,15 @@ Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
|
|||
Builder.SetInsertPoint(PrevInsertPoint);
|
||||
|
||||
// Create call for GOMP_parallel_loop_runtime_start.
|
||||
SubfunctionParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
|
||||
"omp_data");
|
||||
SubfunctionParam =
|
||||
Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
|
||||
|
||||
NumberOfThreads = Builder.getInt32(0);
|
||||
|
||||
// Add one as the upper bound provided by openmp is a < comparison
|
||||
// whereas the codegenForSequential function creates a <= comparison.
|
||||
UpperBound = Builder.CreateAdd(UpperBound,
|
||||
ConstantInt::get(getIntPtrTy(), 1));
|
||||
UpperBound =
|
||||
Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
|
||||
|
||||
createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
|
||||
LowerBound, UpperBound, Stride);
|
||||
|
|
|
@ -34,8 +34,8 @@ using namespace llvm;
|
|||
using namespace polly;
|
||||
|
||||
PTXGenerator::PTXGenerator(IRBuilder<> &Builder, Pass *P,
|
||||
const std::string &Triple):
|
||||
Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1),
|
||||
const std::string &Triple)
|
||||
: Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1),
|
||||
BlockWidth(1), BlockHeight(1), OutputBytes(0) {
|
||||
|
||||
InitializeGPUDataTypes();
|
||||
|
@ -50,7 +50,7 @@ Function *PTXGenerator::createSubfunctionDefinition(int NumArgs) {
|
|||
|
||||
Module *M = getModule();
|
||||
Function *F = Builder.GetInsertBlock()->getParent();
|
||||
std::vector<Type*> Arguments;
|
||||
std::vector<Type *> Arguments;
|
||||
for (int i = 0; i < NumArgs; i++)
|
||||
Arguments.push_back(Builder.getInt8PtrTy());
|
||||
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
|
||||
|
@ -67,10 +67,9 @@ Function *PTXGenerator::createSubfunctionDefinition(int NumArgs) {
|
|||
return FN;
|
||||
}
|
||||
|
||||
void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
|
||||
SetVector<Value*> &OriginalIVS,
|
||||
PTXGenerator::ValueToValueMapTy &VMap,
|
||||
Function **SubFunction) {
|
||||
void PTXGenerator::createSubfunction(
|
||||
SetVector<Value *> &UsedValues, SetVector<Value *> &OriginalIVS,
|
||||
PTXGenerator::ValueToValueMapTy &VMap, Function **SubFunction) {
|
||||
Function *FN = createSubfunctionDefinition(UsedValues.size());
|
||||
Module *M = getModule();
|
||||
LLVMContext &Context = FN->getContext();
|
||||
|
@ -98,7 +97,7 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
|
|||
Value *BaseAddr = UsedValues[j];
|
||||
Type *ArrayTy = BaseAddr->getType();
|
||||
Value *Param = Builder.CreateBitCast(AI, ArrayTy);
|
||||
VMap.insert(std::make_pair<Value*, Value*>(BaseAddr, Param));
|
||||
VMap.insert(std::make_pair<Value *, Value *>(BaseAddr, Param));
|
||||
AI++;
|
||||
}
|
||||
|
||||
|
@ -142,8 +141,8 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
|
|||
Value *BlockID, *ThreadID;
|
||||
switch (NumDims) {
|
||||
case 1: {
|
||||
Value *BlockSize = Builder.CreateMul(BlockWidth, BlockHeight,
|
||||
"p_gpu_blocksize");
|
||||
Value *BlockSize =
|
||||
Builder.CreateMul(BlockWidth, BlockHeight, "p_gpu_blocksize");
|
||||
BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i");
|
||||
BlockID = Builder.CreateAdd(BlockID, BIDx);
|
||||
BlockID = Builder.CreateMul(BlockID, BlockSize);
|
||||
|
@ -183,11 +182,11 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
|
|||
return;
|
||||
}
|
||||
|
||||
assert(OriginalIVS.size() == Substitutions.size()
|
||||
&& "The size of IVS should be equal to the size of substitutions.");
|
||||
assert(OriginalIVS.size() == Substitutions.size() &&
|
||||
"The size of IVS should be equal to the size of substitutions.");
|
||||
for (unsigned i = 0; i < OriginalIVS.size(); ++i) {
|
||||
VMap.insert(std::make_pair<Value*, Value*>(OriginalIVS[i],
|
||||
Substitutions[i]));
|
||||
VMap.insert(
|
||||
std::make_pair<Value *, Value *>(OriginalIVS[i], Substitutions[i]));
|
||||
}
|
||||
|
||||
Builder.CreateBr(ExitBB);
|
||||
|
@ -202,10 +201,9 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
|
|||
*SubFunction = FN;
|
||||
}
|
||||
|
||||
void PTXGenerator::startGeneration(SetVector<Value*> &UsedValues,
|
||||
SetVector<Value*> &OriginalIVS,
|
||||
ValueToValueMapTy &VMap,
|
||||
BasicBlock::iterator *LoopBody) {
|
||||
void PTXGenerator::startGeneration(
|
||||
SetVector<Value *> &UsedValues, SetVector<Value *> &OriginalIVS,
|
||||
ValueToValueMapTy &VMap, BasicBlock::iterator *LoopBody) {
|
||||
Function *SubFunction;
|
||||
BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
|
||||
createSubfunction(UsedValues, OriginalIVS, VMap, &SubFunction);
|
||||
|
@ -213,9 +211,7 @@ void PTXGenerator::startGeneration(SetVector<Value*> &UsedValues,
|
|||
Builder.SetInsertPoint(PrevInsertPoint);
|
||||
}
|
||||
|
||||
IntegerType *PTXGenerator::getInt64Type() {
|
||||
return Builder.getInt64Ty();
|
||||
}
|
||||
IntegerType *PTXGenerator::getInt64Type() { return Builder.getInt64Ty(); }
|
||||
|
||||
PointerType *PTXGenerator::getI8PtrType() {
|
||||
return PointerType::getUnqual(Builder.getInt8Ty());
|
||||
|
@ -260,7 +256,7 @@ void PTXGenerator::InitializeGPUDataTypes() {
|
|||
ModuleTy = StructType::create(Context, "struct.PollyGPUModuleT");
|
||||
KernelTy = StructType::create(Context, "struct.PollyGPUFunctionT");
|
||||
DeviceTy = StructType::create(Context, "struct.PollyGPUDeviceT");
|
||||
DevDataTy = StructType::create(Context,"struct.PollyGPUDevicePtrT");
|
||||
DevDataTy = StructType::create(Context, "struct.PollyGPUDevicePtrT");
|
||||
EventTy = StructType::create(Context, "struct.PollyGPUEventT");
|
||||
}
|
||||
|
||||
|
@ -272,7 +268,7 @@ void PTXGenerator::createCallInitDevice(Value *Context, Value *Device) {
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(PointerType::getUnqual(getGPUContextPtrType()));
|
||||
Args.push_back(PointerType::getUnqual(getGPUDevicePtrType()));
|
||||
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
|
||||
|
@ -290,7 +286,7 @@ void PTXGenerator::createCallGetPTXModule(Value *Buffer, Value *Module) {
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getI8PtrType());
|
||||
Args.push_back(PointerType::getUnqual(getGPUModulePtrType()));
|
||||
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
|
||||
|
@ -309,7 +305,7 @@ void PTXGenerator::createCallGetPTXKernelEntry(Value *Entry, Value *Module,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getI8PtrType());
|
||||
Args.push_back(getGPUModulePtrType());
|
||||
Args.push_back(PointerType::getUnqual(getGPUFunctionPtrType()));
|
||||
|
@ -320,9 +316,8 @@ void PTXGenerator::createCallGetPTXKernelEntry(Value *Entry, Value *Module,
|
|||
Builder.CreateCall3(F, Entry, Module, Kernel);
|
||||
}
|
||||
|
||||
void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData,
|
||||
Value *DeviceData,
|
||||
Value *Size) {
|
||||
void PTXGenerator::createCallAllocateMemoryForHostAndDevice(
|
||||
Value *HostData, Value *DeviceData, Value *Size) {
|
||||
const char *Name = "polly_allocateMemoryForHostAndDevice";
|
||||
Module *M = getModule();
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -330,7 +325,7 @@ void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getPtrI8PtrType());
|
||||
Args.push_back(PointerType::getUnqual(getPtrGPUDevicePtrType()));
|
||||
Args.push_back(getInt64Type());
|
||||
|
@ -341,9 +336,8 @@ void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData,
|
|||
Builder.CreateCall3(F, HostData, DeviceData, Size);
|
||||
}
|
||||
|
||||
void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData,
|
||||
Value *HostData,
|
||||
Value *Size) {
|
||||
void PTXGenerator::createCallCopyFromHostToDevice(
|
||||
Value *DeviceData, Value *HostData, Value *Size) {
|
||||
const char *Name = "polly_copyFromHostToDevice";
|
||||
Module *M = getModule();
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -351,7 +345,7 @@ void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getPtrGPUDevicePtrType());
|
||||
Args.push_back(getI8PtrType());
|
||||
Args.push_back(getInt64Type());
|
||||
|
@ -362,9 +356,8 @@ void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData,
|
|||
Builder.CreateCall3(F, DeviceData, HostData, Size);
|
||||
}
|
||||
|
||||
void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData,
|
||||
Value *DeviceData,
|
||||
Value *Size) {
|
||||
void PTXGenerator::createCallCopyFromDeviceToHost(
|
||||
Value *HostData, Value *DeviceData, Value *Size) {
|
||||
const char *Name = "polly_copyFromDeviceToHost";
|
||||
Module *M = getModule();
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -372,7 +365,7 @@ void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getI8PtrType());
|
||||
Args.push_back(getPtrGPUDevicePtrType());
|
||||
Args.push_back(getInt64Type());
|
||||
|
@ -383,10 +376,8 @@ void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData,
|
|||
Builder.CreateCall3(F, HostData, DeviceData, Size);
|
||||
}
|
||||
|
||||
void PTXGenerator::createCallSetKernelParameters(Value *Kernel,
|
||||
Value *BlockWidth,
|
||||
Value *BlockHeight,
|
||||
Value *DeviceData) {
|
||||
void PTXGenerator::createCallSetKernelParameters(
|
||||
Value *Kernel, Value *BlockWidth, Value *BlockHeight, Value *DeviceData) {
|
||||
const char *Name = "polly_setKernelParameters";
|
||||
Module *M = getModule();
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -394,7 +385,7 @@ void PTXGenerator::createCallSetKernelParameters(Value *Kernel,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getGPUFunctionPtrType());
|
||||
Args.push_back(getInt64Type());
|
||||
Args.push_back(getInt64Type());
|
||||
|
@ -415,7 +406,7 @@ void PTXGenerator::createCallLaunchKernel(Value *Kernel, Value *GridWidth,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getGPUFunctionPtrType());
|
||||
Args.push_back(getInt64Type());
|
||||
Args.push_back(getInt64Type());
|
||||
|
@ -435,7 +426,7 @@ void PTXGenerator::createCallStartTimerByCudaEvent(Value *StartEvent,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(PointerType::getUnqual(getGPUEventPtrType()));
|
||||
Args.push_back(PointerType::getUnqual(getGPUEventPtrType()));
|
||||
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
|
||||
|
@ -445,9 +436,8 @@ void PTXGenerator::createCallStartTimerByCudaEvent(Value *StartEvent,
|
|||
Builder.CreateCall2(F, StartEvent, StopEvent);
|
||||
}
|
||||
|
||||
void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent,
|
||||
Value *StopEvent,
|
||||
Value *Timer) {
|
||||
void PTXGenerator::createCallStopTimerByCudaEvent(
|
||||
Value *StartEvent, Value *StopEvent, Value *Timer) {
|
||||
const char *Name = "polly_stopTimerByCudaEvent";
|
||||
Module *M = getModule();
|
||||
Function *F = M->getFunction(Name);
|
||||
|
@ -455,7 +445,7 @@ void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getGPUEventPtrType());
|
||||
Args.push_back(getGPUEventPtrType());
|
||||
Args.push_back(getFloatPtrType());
|
||||
|
@ -466,10 +456,8 @@ void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent,
|
|||
Builder.CreateCall3(F, StartEvent, StopEvent, Timer);
|
||||
}
|
||||
|
||||
void PTXGenerator::createCallCleanupGPGPUResources(Value *HostData,
|
||||
Value *DeviceData,
|
||||
Value *Module,
|
||||
Value *Context,
|
||||
void PTXGenerator::createCallCleanupGPGPUResources(
|
||||
Value *HostData, Value *DeviceData, Value *Module, Value *Context,
|
||||
Value *Kernel) {
|
||||
const char *Name = "polly_cleanupGPGPUResources";
|
||||
llvm::Module *M = getModule();
|
||||
|
@ -478,7 +466,7 @@ void PTXGenerator::createCallCleanupGPGPUResources(Value *HostData,
|
|||
// If F is not available, declare it.
|
||||
if (!F) {
|
||||
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
||||
std::vector<Type*> Args;
|
||||
std::vector<Type *> Args;
|
||||
Args.push_back(getI8PtrType());
|
||||
Args.push_back(getPtrGPUDevicePtrType());
|
||||
Args.push_back(getGPUModulePtrType());
|
||||
|
@ -516,11 +504,11 @@ Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) {
|
|||
std::string LLVMKernelStr;
|
||||
raw_string_ostream NameROS(LLVMKernelStr);
|
||||
formatted_raw_ostream FOS(NameROS);
|
||||
FOS << "target triple = \"" << GPUTriple <<"\"\n";
|
||||
FOS << "target triple = \"" << GPUTriple << "\"\n";
|
||||
SubFunction->print(FOS);
|
||||
|
||||
// Insert ptx intrinsics into the kernel string.
|
||||
for (Module::iterator I = M->begin(), E = M->end(); I != E; ) {
|
||||
for (Module::iterator I = M->begin(), E = M->end(); I != E;) {
|
||||
Function *F = I++;
|
||||
// Function must be a prototype and unused.
|
||||
if (F->isDeclaration() && F->isIntrinsic()) {
|
||||
|
@ -541,13 +529,12 @@ Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) {
|
|||
}
|
||||
}
|
||||
|
||||
Value *LLVMKernel = Builder.CreateGlobalStringPtr(LLVMKernelStr,
|
||||
"llvm_kernel");
|
||||
Value *LLVMKernel =
|
||||
Builder.CreateGlobalStringPtr(LLVMKernelStr, "llvm_kernel");
|
||||
Value *MCPU = Builder.CreateGlobalStringPtr("sm_10", "mcpu");
|
||||
Value *Features = Builder.CreateGlobalStringPtr("", "cpu_features");
|
||||
|
||||
Function *GetDeviceKernel = Intrinsic::getDeclaration(M,
|
||||
Intrinsic::codegen);
|
||||
Function *GetDeviceKernel = Intrinsic::getDeclaration(M, Intrinsic::codegen);
|
||||
|
||||
return Builder.CreateCall3(GetDeviceKernel, LLVMKernel, MCPU, Features);
|
||||
}
|
||||
|
@ -596,22 +583,22 @@ void PTXGenerator::eraseUnusedFunctions(Function *SubFunction) {
|
|||
|
||||
void PTXGenerator::finishGeneration(Function *F) {
|
||||
// Define data used by the GPURuntime library.
|
||||
AllocaInst *PtrCUContext = Builder.CreateAlloca(getGPUContextPtrType(), 0,
|
||||
"phcontext");
|
||||
AllocaInst *PtrCUDevice = Builder.CreateAlloca(getGPUDevicePtrType(), 0,
|
||||
"phdevice");
|
||||
AllocaInst *PtrCUModule = Builder.CreateAlloca(getGPUModulePtrType(), 0,
|
||||
"phmodule");
|
||||
AllocaInst *PtrCUKernel = Builder.CreateAlloca(getGPUFunctionPtrType(), 0,
|
||||
"phkernel");
|
||||
AllocaInst *PtrCUStartEvent = Builder.CreateAlloca(getGPUEventPtrType(), 0,
|
||||
"pstart_timer");
|
||||
AllocaInst *PtrCUStopEvent = Builder.CreateAlloca(getGPUEventPtrType(), 0,
|
||||
"pstop_timer");
|
||||
AllocaInst *PtrDevData = Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0,
|
||||
"pdevice_data");
|
||||
AllocaInst *PtrHostData = Builder.CreateAlloca(getI8PtrType(), 0,
|
||||
"phost_data");
|
||||
AllocaInst *PtrCUContext =
|
||||
Builder.CreateAlloca(getGPUContextPtrType(), 0, "phcontext");
|
||||
AllocaInst *PtrCUDevice =
|
||||
Builder.CreateAlloca(getGPUDevicePtrType(), 0, "phdevice");
|
||||
AllocaInst *PtrCUModule =
|
||||
Builder.CreateAlloca(getGPUModulePtrType(), 0, "phmodule");
|
||||
AllocaInst *PtrCUKernel =
|
||||
Builder.CreateAlloca(getGPUFunctionPtrType(), 0, "phkernel");
|
||||
AllocaInst *PtrCUStartEvent =
|
||||
Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstart_timer");
|
||||
AllocaInst *PtrCUStopEvent =
|
||||
Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstop_timer");
|
||||
AllocaInst *PtrDevData =
|
||||
Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0, "pdevice_data");
|
||||
AllocaInst *PtrHostData =
|
||||
Builder.CreateAlloca(getI8PtrType(), 0, "phost_data");
|
||||
Type *FloatTy = llvm::Type::getFloatTy(getModule()->getContext());
|
||||
AllocaInst *PtrElapsedTimes = Builder.CreateAlloca(FloatTy, 0, "ptimer");
|
||||
|
||||
|
@ -648,13 +635,11 @@ void PTXGenerator::finishGeneration(Function *F) {
|
|||
// Record the end time.
|
||||
LoadInst *CUStartEvent = Builder.CreateLoad(PtrCUStartEvent, "start_timer");
|
||||
LoadInst *CUStopEvent = Builder.CreateLoad(PtrCUStopEvent, "stop_timer");
|
||||
createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent,
|
||||
PtrElapsedTimes);
|
||||
createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent, PtrElapsedTimes);
|
||||
|
||||
// Cleanup all the resources used.
|
||||
LoadInst *CUContext = Builder.CreateLoad(PtrCUContext, "cucontext");
|
||||
createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext,
|
||||
CUKernel);
|
||||
createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext, CUKernel);
|
||||
|
||||
// Erase the ptx kernel and device subfunctions and ptx intrinsics from
|
||||
// current module.
|
||||
|
|
Loading…
Reference in New Issue