CodeGen: clang-format goodness

The changed files are not yet clang-format clean, but we are getting close.

llvm-svn: 174403
This commit is contained in:
Tobias Grosser 2013-02-05 18:01:29 +00:00
parent dc69f6fbca
commit c14582f276
7 changed files with 415 additions and 516 deletions

View File

@ -31,15 +31,13 @@ using namespace llvm;
using namespace polly;
static cl::opt<bool>
Aligned("enable-polly-aligned",
cl::desc("Assumed aligned memory accesses."), cl::Hidden,
cl::value_desc("OpenMP code generation enabled if true"),
Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."),
cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
cl::init(false), cl::ZeroOrMore);
static cl::opt<bool>
SCEVCodegen("polly-codegen-scev",
cl::desc("Use SCEV based code generation."), cl::Hidden,
cl::init(false), cl::ZeroOrMore);
SCEVCodegen("polly-codegen-scev", cl::desc("Use SCEV based code generation."),
cl::Hidden, cl::init(false), cl::ZeroOrMore);
/// The SCEVRewriter takes a scalar evolution expression and updates the
/// following components:
@ -114,13 +112,10 @@ public:
return Expr;
}
return SCEVVisitor<SCEVRewriter, const SCEV *>::visit(Expr);
}
const SCEV *visitConstant(const SCEVConstant *Constant) {
return Constant;
}
const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; }
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
const SCEV *Operand = visit(Expr->getOperand());
@ -369,8 +364,8 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
if (GlobalMap.count(Old)) {
Value *New = GlobalMap[Old];
if (Old->getType()->getScalarSizeInBits()
< New->getType()->getScalarSizeInBits())
if (Old->getType()->getScalarSizeInBits() <
New->getType()->getScalarSizeInBits())
New = Builder.CreateTruncOrBitCast(New, Old->getType());
return New;
@ -383,9 +378,8 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
if (SCEVCodegen && SE.isSCEVable(Old->getType()))
if (const SCEV *Scev = SE.getSCEV(const_cast<Value *>(Old)))
if (!isa<SCEVCouldNotCompute>(Scev)) {
const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
*Statement.getParent(), SE,
GlobalMap, BBMap);
const SCEV *NewScev = SCEVRewriter::rewrite(
Scev, *Statement.getParent(), SE, GlobalMap, BBMap);
SCEVExpander Expander(SE, "polly");
Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
Builder.GetInsertPoint());
@ -414,13 +408,14 @@ void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
// Replace old operands with the new ones.
for (Instruction::const_op_iterator OI = Inst->op_begin(),
OE = Inst->op_end(); OI != OE; ++OI) {
OE = Inst->op_end();
OI != OE; ++OI) {
Value *OldOperand = *OI;
Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
if (!NewOperand) {
assert(!isa<StoreInst>(NewInst)
&& "Store instructions are always needed!");
assert(!isa<StoreInst>(NewInst) &&
"Store instructions are always needed!");
delete NewInst;
return;
}
@ -436,8 +431,8 @@ void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
}
std::vector<Value *> BlockGenerator::getMemoryAccessIndex(
__isl_keep isl_map *AccessRelation, Value *BaseAddress,
ValueMapT &BBMap, ValueMapT &GlobalMap) {
__isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap,
ValueMapT &GlobalMap) {
assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
"Only single dimensional access functions supported");
@ -464,19 +459,17 @@ std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
}
Value *BlockGenerator::getNewAccessOperand(
__isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
ValueMapT &BBMap, ValueMapT &GlobalMap) {
std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
BaseAddress,
BBMap, GlobalMap);
Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
"p_newarrayidx_");
__isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap,
ValueMapT &GlobalMap) {
std::vector<Value *> IndexArray =
getMemoryAccessIndex(NewAccessRelation, BaseAddress, BBMap, GlobalMap);
Value *NewOperand =
Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_");
return NewOperand;
}
Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
const Value *Pointer,
ValueMapT &BBMap,
Value *BlockGenerator::generateLocationAccessed(
const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap,
ValueMapT &GlobalMap) {
MemoryAccess &Access = Statement.getAccessFor(Inst);
isl_map *CurrentAccessRelation = Access.getAccessRelation();
@ -491,8 +484,8 @@ Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
} else {
Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr());
NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
BBMap, GlobalMap);
NewPointer =
getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, GlobalMap);
}
isl_map_free(CurrentAccessRelation);
@ -500,23 +493,21 @@ Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
return NewPointer;
}
Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
ValueMapT &BBMap,
ValueMapT &GlobalMap) {
Value *BlockGenerator::generateScalarLoad(
const LoadInst *Load, ValueMapT &BBMap, ValueMapT &GlobalMap) {
const Value *Pointer = Load->getPointerOperand();
const Instruction *Inst = dyn_cast<Instruction>(Load);
Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
Value *ScalarLoad = Builder.CreateLoad(NewPointer,
Load->getName() + "_p_scalar_");
Value *ScalarLoad =
Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
return ScalarLoad;
}
Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
ValueMapT &BBMap,
ValueMapT &GlobalMap) {
Value *BlockGenerator::generateScalarStore(
const StoreInst *Store, ValueMapT &BBMap, ValueMapT &GlobalMap) {
const Value *Pointer = Store->getPointerOperand();
Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
GlobalMap);
Value *NewPointer =
generateLocationAccessed(Store, Pointer, BBMap, GlobalMap);
Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
return Builder.CreateStore(ValueOperand, NewPointer);
@ -547,8 +538,8 @@ void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
BasicBlock *BB = Statement.getBasicBlock();
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
Builder.GetInsertPoint(), P);
BasicBlock *CopyBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
CopyBB->setName("polly.stmt." + BB->getName());
Builder.SetInsertPoint(CopyBB->begin());
@ -559,19 +550,16 @@ void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
copyInstruction(II, BBMap, GlobalMap);
}
VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
VectorValueMapT &GlobalMaps,
ScopStmt &Stmt,
__isl_keep isl_map *Schedule,
Pass *P)
VectorBlockGenerator::VectorBlockGenerator(
IRBuilder<> &B, VectorValueMapT &GlobalMaps, ScopStmt &Stmt,
__isl_keep isl_map *Schedule, Pass *P)
: BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
assert(GlobalMaps.size() > 1 && "Only one vector lane found");
assert(Schedule && "No statement domain provided");
}
Value *VectorBlockGenerator::getVectorValue(const Value *Old,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
Value *VectorBlockGenerator::getVectorValue(
const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
if (VectorMap.count(Old))
return VectorMap[Old];
@ -580,10 +568,8 @@ Value *VectorBlockGenerator::getVectorValue(const Value *Old,
Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
for (int Lane = 0; Lane < Width; Lane++)
Vector = Builder.CreateInsertElement(Vector,
getNewValue(Old,
ScalarMaps[Lane],
GlobalMaps[Lane]),
Vector = Builder.CreateInsertElement(
Vector, getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane]),
Builder.getInt32(Lane));
VectorMap[Old] = Vector;
@ -606,10 +592,10 @@ Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
const Value *Pointer = Load->getPointerOperand();
Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
"vector_ptr");
LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
Load->getName() + "_p_vec_full");
Value *VectorPtr =
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
LoadInst *VecLoad =
Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
if (!Aligned)
VecLoad->setAlignment(8);
@ -623,25 +609,22 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
Load->getName() + "_p_vec_p");
LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
Load->getName() + "_p_splat_one");
LoadInst *ScalarLoad =
Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
if (!Aligned)
ScalarLoad->setAlignment(8);
Constant *SplatVector =
Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
getVectorWidth()));
Constant *SplatVector = Constant::getNullValue(
VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
SplatVector,
Load->getName()
+ "_p_splat");
Value *VectorLoad = Builder.CreateShuffleVector(
ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
return VectorLoad;
}
Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
VectorValueMapT &ScalarMaps) {
Value *VectorBlockGenerator::generateUnknownStrideLoad(
const LoadInst *Load, VectorValueMapT &ScalarMaps) {
int VectorWidth = getVectorWidth();
const Value *Pointer = Load->getPointerOperand();
VectorType *VectorType = VectorType::get(
@ -651,24 +634,22 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
for (int i = 0; i < VectorWidth; i++) {
Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
Value *ScalarLoad = Builder.CreateLoad(NewPointer,
Load->getName() + "_p_scalar_");
Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
Builder.getInt32(i),
Load->getName() + "_p_vec_");
Value *ScalarLoad =
Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
Vector = Builder.CreateInsertElement(
Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
}
return Vector;
}
void VectorBlockGenerator::generateLoad(const LoadInst *Load,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
void VectorBlockGenerator::generateLoad(
const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
!VectorType::isValidElementType(Load->getType())) {
for (int i = 0; i < getVectorWidth(); i++)
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
GlobalMaps[i]);
ScalarMaps[i][Load] =
generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i]);
return;
}
@ -689,8 +670,8 @@ void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
int VectorWidth = getVectorWidth();
Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
ScalarMaps);
Value *NewOperand =
getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps);
assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
@ -714,23 +695,22 @@ void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
VectorMap[Inst] = NewInst;
}
void VectorBlockGenerator::copyStore(const StoreInst *Store,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
void VectorBlockGenerator::copyStore(
const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
int VectorWidth = getVectorWidth();
MemoryAccess &Access = Statement.getAccessFor(Store);
const Value *Pointer = Store->getPointerOperand();
Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
ScalarMaps);
Value *Vector =
getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps);
if (Access.isStrideOne(isl_map_copy(Schedule))) {
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
"vector_ptr");
Value *VectorPtr =
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
if (!Aligned)
@ -747,7 +727,8 @@ void VectorBlockGenerator::copyStore(const StoreInst *Store,
bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
ValueMapT &VectorMap) {
for (Instruction::const_op_iterator OI = Inst->op_begin(),
OE = Inst->op_end(); OI != OE; ++OI)
OE = Inst->op_end();
OI != OE; ++OI)
if (VectorMap.count(*OI))
return true;
return false;
@ -760,7 +741,8 @@ bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
int VectorWidth = getVectorWidth();
for (Instruction::const_op_iterator OI = Inst->op_begin(),
OE = Inst->op_end(); OI != OE; ++OI) {
OE = Inst->op_end();
OI != OE; ++OI) {
ValueMapT::iterator VecOp = VectorMap.find(*OI);
if (VecOp == VectorMap.end())
@ -810,9 +792,7 @@ void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
VectorMap[Inst] = Vector;
}
int VectorBlockGenerator::getVectorWidth() {
return GlobalMaps.size();
}
int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
ValueMapT &VectorMap,
@ -855,8 +835,8 @@ void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
void VectorBlockGenerator::copyBB() {
BasicBlock *BB = Statement.getBasicBlock();
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
Builder.GetInsertPoint(), P);
BasicBlock *CopyBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
CopyBB->setName("polly.stmt." + BB->getName());
Builder.SetInsertPoint(CopyBB->begin());
@ -877,7 +857,7 @@ void VectorBlockGenerator::copyBB() {
VectorValueMapT ScalarBlockMap(getVectorWidth());
ValueMapT VectorBlockMap;
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II)
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
++II)
copyInstruction(II, VectorBlockMap, ScalarBlockMap);
}

View File

@ -97,9 +97,7 @@ public:
//close(FD[1]);
}
FILE *getInputFile() {
return input;
}
FILE *getInputFile() { return input; }
void closeInput() {
fclose(input);
@ -141,9 +139,7 @@ void Cloog::pprint(raw_ostream &OS) {
}
/// Create the Cloog AST from this program.
struct clast_root *Cloog::getClast() {
return (clast_root*)ClastRoot;
}
struct clast_root *Cloog::getClast() { return (clast_root *)ClastRoot; }
void Cloog::buildCloogOptions() {
Options = cloog_options_malloc(State);
@ -234,20 +230,13 @@ void ClastVisitor::visit(const clast_stmt *stmt) {
visit(stmt->next);
}
void ClastVisitor::visitAssignment(const clast_assignment *stmt) {
}
void ClastVisitor::visitAssignment(const clast_assignment *stmt) {}
void ClastVisitor::visitBlock(const clast_block *stmt) {
visit(stmt->body);
}
void ClastVisitor::visitBlock(const clast_block *stmt) { visit(stmt->body); }
void ClastVisitor::visitFor(const clast_for *stmt) {
visit(stmt->body);
}
void ClastVisitor::visitFor(const clast_for *stmt) { visit(stmt->body); }
void ClastVisitor::visitGuard(const clast_guard *stmt) {
visit(stmt->then);
}
void ClastVisitor::visitGuard(const clast_guard *stmt) { visit(stmt->then); }
} // End namespace polly.
@ -310,29 +299,20 @@ void CloogExporter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CloogInfo>();
}
static RegisterPass<CloogExporter> A("polly-export-cloog",
"Polly - Export the Cloog input file"
" (Writes a .cloog file for each Scop)"
);
static RegisterPass<CloogExporter>
A("polly-export-cloog", "Polly - Export the Cloog input file"
" (Writes a .cloog file for each Scop)");
llvm::Pass *polly::createCloogExporterPass() {
return new CloogExporter();
}
llvm::Pass *polly::createCloogExporterPass() { return new CloogExporter(); }
/// Write a .cloog input file
void CloogInfo::dump(FILE *F) {
C->dump(F);
}
void CloogInfo::dump(FILE *F) { C->dump(F); }
/// Print a source code representation of the program.
void CloogInfo::pprint(llvm::raw_ostream &OS) {
C->pprint(OS);
}
void CloogInfo::pprint(llvm::raw_ostream &OS) { C->pprint(OS); }
/// Create the Cloog AST from this program.
const struct clast_root *CloogInfo::getClast() {
return C->getClast();
}
const struct clast_root *CloogInfo::getClast() { return C->getClast(); }
void CloogInfo::releaseMemory() {
if (C) {

View File

@ -61,21 +61,18 @@ struct isl_set;
namespace polly {
static cl::opt<bool>
OpenMP("enable-polly-openmp",
cl::desc("Generate OpenMP parallel code"), cl::Hidden,
cl::value_desc("OpenMP code generation enabled if true"),
OpenMP("enable-polly-openmp", cl::desc("Generate OpenMP parallel code"),
cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
cl::init(false), cl::ZeroOrMore);
#ifdef GPU_CODEGEN
static cl::opt<bool>
GPGPU("enable-polly-gpgpu",
cl::desc("Generate GPU parallel code"), cl::Hidden,
cl::value_desc("GPGPU code generation enabled if true"),
cl::init(false), cl::ZeroOrMore);
GPGPU("enable-polly-gpgpu", cl::desc("Generate GPU parallel code"), cl::Hidden,
cl::value_desc("GPGPU code generation enabled if true"), cl::init(false),
cl::ZeroOrMore);
static cl::opt<std::string>
GPUTriple("polly-gpgpu-triple",
cl::desc("Target triple for GPU code generation"),
static cl::opt<std::string> GPUTriple(
"polly-gpgpu-triple", cl::desc("Target triple for GPU code generation"),
cl::Hidden, cl::init(""));
#endif /* GPU_CODEGEN */
@ -140,8 +137,7 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
switch (e->type) {
case clast_bin_mod:
return Builder.CreateSRem(LHS, RHS);
case clast_bin_fdiv:
{
case clast_bin_fdiv: {
// floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
Value *One = ConstantInt::get(Ty, 1);
Value *Zero = ConstantInt::get(Ty, 0);
@ -151,8 +147,7 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS);
return Builder.CreateSDiv(Dividend, RHS);
}
case clast_bin_cdiv:
{
case clast_bin_cdiv: {
// ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d
Value *One = ConstantInt::get(Ty, 1);
Value *Zero = ConstantInt::get(Ty, 0);
@ -164,30 +159,26 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
}
case clast_bin_div:
return Builder.CreateSDiv(LHS, RHS);
};
}
llvm_unreachable("Unknown clast binary expression type");
}
Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) {
assert(( r->type == clast_red_min
|| r->type == clast_red_max
|| r->type == clast_red_sum)
&& "Clast reduction type not supported");
assert((r->type == clast_red_min || r->type == clast_red_max ||
r->type == clast_red_sum) && "Clast reduction type not supported");
Value *old = codegen(r->elts[0], Ty);
for (int i = 1; i < r->n; ++i) {
Value *exprValue = codegen(r->elts[i], Ty);
switch (r->type) {
case clast_red_min:
{
case clast_red_min: {
Value *cmp = Builder.CreateICmpSLT(old, exprValue);
old = Builder.CreateSelect(cmp, old, exprValue);
break;
}
case clast_red_max:
{
case clast_red_max: {
Value *cmp = Builder.CreateICmpSGT(old, exprValue);
old = Builder.CreateSelect(cmp, old, exprValue);
break;
@ -260,8 +251,8 @@ private:
unsigned Dimension, int vectorDim,
std::vector<ValueMapT> *VectorVMap = 0);
void codegenSubstitutions(const clast_stmt *Assignment,
ScopStmt *Statement, int vectorDim = 0,
void codegenSubstitutions(const clast_stmt *Assignment, ScopStmt *Statement,
int vectorDim = 0,
std::vector<ValueMapT> *VectorVMap = 0);
void codegen(const clast_user_stmt *u, std::vector<Value *> *IVS = NULL,
@ -304,10 +295,9 @@ private:
void codegenForGPGPU(const clast_for *F);
/// @brief Get innermost for loop.
const clast_stmt *getScheduleInfo(const clast_for *F,
std::vector<int> &NumIters,
unsigned &LoopDepth,
unsigned &NonPLoopDepth);
const clast_stmt *
getScheduleInfo(const clast_for *F, std::vector<int> &NumIters,
unsigned &LoopDepth, unsigned &NonPLoopDepth);
#endif /* GPU_CODEGEN */
/// @brief Check if a loop is parallel
@ -379,8 +369,8 @@ void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt,
ValueMap[PN] = RHS;
}
void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment,
ScopStmt *Statement, int vectorDim,
void ClastStmtCodeGen::codegenSubstitutions(
const clast_stmt *Assignment, ScopStmt *Statement, int vectorDim,
std::vector<ValueMapT> *VectorVMap) {
int Dimension = 0;
@ -397,11 +387,12 @@ void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment,
// Takes the cloog specific domain and translates it into a map Statement ->
// PartialSchedule, where the PartialSchedule contains all the dimensions that
// have been code generated up to this point.
static __isl_give isl_map *extractPartialSchedule(ScopStmt *Statement,
isl_set *Domain) {
static __isl_give isl_map *
extractPartialSchedule(ScopStmt *Statement, isl_set *Domain) {
isl_map *Schedule = Statement->getScattering();
int ScheduledDimensions = isl_set_dim(Domain, isl_dim_set);
int UnscheduledDimensions = isl_map_dim(Schedule, isl_dim_out) - ScheduledDimensions;
int UnscheduledDimensions =
isl_map_dim(Schedule, isl_dim_out) - ScheduledDimensions;
return isl_map_project_out(Schedule, isl_dim_out, ScheduledDimensions,
UnscheduledDimensions);
@ -484,7 +475,8 @@ public:
++BI) {
const Instruction &Inst = *BI;
for (Instruction::const_op_iterator II = Inst.op_begin(),
IE = Inst.op_end(); II != IE; ++II) {
IE = Inst.op_end();
II != IE; ++II) {
Value *SrcVal = *II;
if (Instruction *OpInst = dyn_cast<Instruction>(SrcVal))
@ -507,8 +499,8 @@ SetVector<Value*> ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) {
SetVector<Value *> Values;
// The clast variables
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
I != E; I++)
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); I != E;
I++)
Values.insert(I->second);
// Find the temporaries that are referenced in the clast statements'
@ -532,14 +524,15 @@ void ClastStmtCodeGen::updateWithValueMap(
OMPGenerator::ValueToValueMapTy &VMap) {
std::set<Value *> Inserted;
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
I != E; I++) {
for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); I != E;
I++) {
ClastVars[I->first] = VMap[I->second];
Inserted.insert(I->second);
}
for (OMPGenerator::ValueToValueMapTy::iterator I = VMap.begin(),
E = VMap.end(); I != E; ++I) {
E = VMap.end();
I != E; ++I) {
if (Inserted.count(I->first))
continue;
@ -616,7 +609,8 @@ SetVector<Value*> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) {
for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) {
ScopStmt *Stmt = *SI;
for (SmallVector<MemoryAccess *, 8>::iterator I = Stmt->memacc_begin(),
E = Stmt->memacc_end(); I != E; ++I) {
E = Stmt->memacc_end();
I != E; ++I) {
Value *BaseAddr = const_cast<Value *>((*I)->getBaseAddr());
Values.insert((BaseAddr));
@ -640,9 +634,8 @@ SetVector<Value*> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) {
return Values;
}
const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F,
std::vector<int> &NumIters,
unsigned &LoopDepth,
const clast_stmt *ClastStmtCodeGen::getScheduleInfo(
const clast_for *F, std::vector<int> &NumIters, unsigned &LoopDepth,
unsigned &NonPLoopDepth) {
clast_stmt *Stmt = (clast_stmt *)F;
const clast_for *Result;
@ -672,8 +665,8 @@ const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F,
"The loops should be tiled into 4-depth parallel loops and an "
"innermost non-parallel one (if exist).");
NonPLoopDepth = LoopDepth - NumIters.size();
assert(NonPLoopDepth <= 1
&& "We support only one innermost non-parallel loop currently.");
assert(NonPLoopDepth <= 1 &&
"We support only one innermost non-parallel loop currently.");
return (const clast_stmt *)Result->body;
}
@ -690,8 +683,8 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) {
// Get original IVS and ScopStmt
unsigned TiledLoopDepth, NonPLoopDepth;
const clast_stmt *InnerStmt = getScheduleInfo(F, NumIterations,
TiledLoopDepth, NonPLoopDepth);
const clast_stmt *InnerStmt =
getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth);
const clast_stmt *TmpStmt;
const clast_user_stmt *U;
const clast_for *InnerFor;
@ -872,8 +865,8 @@ void ClastStmtCodeGen::codegen(const clast_guard *g) {
Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
Builder.GetInsertPoint(), P);
BasicBlock *CondBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
CondBB->setName("polly.cond");
BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P);
MergeBB->setName("polly.merge");
@ -964,7 +957,6 @@ public:
CodeGeneration() : ScopPass(ID) {}
bool runOnScop(Scop &S) {
ParallelLoops.clear();
@ -986,7 +978,8 @@ public:
virtual void printScop(raw_ostream &OS) const {
for (std::vector<std::string>::const_iterator PI = ParallelLoops.begin(),
PE = ParallelLoops.end(); PI != PE; ++PI)
PE = ParallelLoops.end();
PI != PE; ++PI)
OS << "Parallel loop with iterator '" << *PI << "' generated\n";
}

View File

@ -78,10 +78,9 @@ struct AstBuildUserInfo {
};
// Print a loop annotated with OpenMP or vector pragmas.
static __isl_give isl_printer *
printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
__isl_take isl_ast_print_options *PrintOptions,
IslAstUser *Info) {
static __isl_give isl_printer *printParallelFor(
__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
__isl_take isl_ast_print_options *PrintOptions, IslAstUser *Info) {
if (Info) {
if (Info->IsInnermostParallel) {
Printer = isl_printer_start_line(Printer);
@ -202,8 +201,8 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
//
// - Detection of openmp parallel loops
//
static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
void *User) {
static __isl_give isl_id *
astBuildBeforeFor(__isl_keep isl_ast_build *Build, void *User) {
struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *)User;
struct IslAstUser *NodeInfo = allocateIslAstUser();
isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
@ -262,8 +261,8 @@ static bool containsLoops(__isl_take isl_ast_node *Node) {
// that is marked as openmp parallel.
//
static __isl_give isl_ast_node *
astBuildAfterFor(__isl_take isl_ast_node *Node,
__isl_keep isl_ast_build *Build, void *User) {
astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
void *User) {
isl_id *Id = isl_ast_node_get_annotation(Node);
if (!Id)
return Node;
@ -285,9 +284,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node,
}
static __isl_give isl_ast_node *
AtEachDomain(__isl_take isl_ast_node *Node,
__isl_keep isl_ast_build *Context, void *User)
{
AtEachDomain(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Context,
void *User) {
struct IslAstUser *Info = NULL;
isl_id *Id = isl_ast_node_get_annotation(Node);
@ -360,16 +358,14 @@ __isl_give isl_union_map *IslAst::getSchedule() {
isl_map *StmtSchedule = Stmt->getScattering();
StmtSchedule = isl_map_intersect_domain(StmtSchedule, Stmt->getDomain());
Schedule = isl_union_map_union(Schedule,
isl_union_map_from_map(StmtSchedule));
Schedule =
isl_union_map_union(Schedule, isl_union_map_from_map(StmtSchedule));
}
return Schedule;
}
IslAst::~IslAst() {
isl_ast_node_free(Root);
}
IslAst::~IslAst() { isl_ast_node_free(Root); }
/// Print a C like representation of the program.
void IslAst::pprint(llvm::raw_ostream &OS) {
@ -390,13 +386,9 @@ void IslAst::pprint(llvm::raw_ostream &OS) {
}
/// Create the isl_ast from this program.
__isl_give isl_ast_node *IslAst::getAst() {
return isl_ast_node_copy(Root);
}
__isl_give isl_ast_node *IslAst::getAst() { return isl_ast_node_copy(Root); }
void IslAstInfo::pprint(llvm::raw_ostream &OS) {
Ast->pprint(OS);
}
void IslAstInfo::pprint(llvm::raw_ostream &OS) { Ast->pprint(OS); }
void IslAstInfo::releaseMemory() {
if (Ast) {
@ -418,9 +410,7 @@ bool IslAstInfo::runOnScop(Scop &Scop) {
return false;
}
__isl_give isl_ast_node *IslAstInfo::getAst() {
return Ast->getAst();
}
__isl_give isl_ast_node *IslAstInfo::getAst() { return Ast->getAst(); }
void IslAstInfo::printScop(raw_ostream &OS) const {
Function *F = S->getRegion().getEntry()->getParent();

View File

@ -93,8 +93,8 @@ Function *RuntimeDebugBuilder::getPrintF() {
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
Builder.getInt8PtrTy(), true);
FunctionType *Ty =
FunctionType::get(Builder.getInt32Ty(), Builder.getInt8PtrTy(), true);
F = Function::Create(Ty, Linkage, Name, M);
}
@ -108,8 +108,8 @@ void RuntimeDebugBuilder::createFlush() {
if (!F) {
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
Builder.getInt8PtrTy(), false);
FunctionType *Ty =
FunctionType::get(Builder.getInt32Ty(), Builder.getInt8PtrTy(), false);
F = Function::Create(Ty, Linkage, Name, M);
}
@ -213,14 +213,12 @@ Value *IslExprBuilder::createOpNAry(__isl_take isl_ast_expr *Expr) {
default:
llvm_unreachable("This is no n-ary isl ast expression");
case isl_ast_op_max:
{
case isl_ast_op_max: {
Value *Cmp = Builder.CreateICmpSGT(V, OpV);
V = Builder.CreateSelect(Cmp, V, OpV);
continue;
}
case isl_ast_op_min:
{
case isl_ast_op_min: {
Value *Cmp = Builder.CreateICmpSLT(V, OpV);
V = Builder.CreateSelect(Cmp, V, OpV);
continue;
@ -299,8 +297,7 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) {
case isl_ast_op_pdiv_q: // Dividend is non-negative
Res = Builder.CreateSDiv(LHS, RHS);
break;
case isl_ast_op_fdiv_q: // Round towards -infty
{
case isl_ast_op_fdiv_q: { // Round towards -infty
// TODO: Review code and check that this calculation does not yield
// incorrect overflow in some bordercases.
//
@ -577,8 +574,8 @@ private:
// of loop iterations.
//
// 3. With the existing code, upper bounds have been easier to implement.
__isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For,
CmpInst::Predicate &Predicate);
__isl_give isl_ast_expr *
getUpperBound(__isl_keep isl_ast_node *For, CmpInst::Predicate &Predicate);
unsigned getNumberOfIterations(__isl_keep isl_ast_node *For);
@ -586,17 +583,16 @@ private:
void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
void createForSequential(__isl_take isl_ast_node *For);
void createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
__isl_take isl_ast_build *Context,
ScopStmt *Stmt, ValueMapT &VMap);
void createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
__isl_take isl_ast_build *Context,
ScopStmt *Stmt, VectorValueMapT &VMap,
std::vector<Value*> &IVS,
__isl_take isl_ast_build *Context, ScopStmt *Stmt,
ValueMapT &VMap);
void createSubstitutionsVector(
__isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context,
ScopStmt *Stmt, VectorValueMapT &VMap, std::vector<Value *> &IVS,
__isl_take isl_id *IteratorID);
void createIf(__isl_take isl_ast_node *If);
void createUserVector(__isl_take isl_ast_node *User,
std::vector<Value*> &IVS, __isl_take isl_id *IteratorID,
__isl_take isl_union_map *Schedule);
void createUserVector(
__isl_take isl_ast_node *User, std::vector<Value *> &IVS,
__isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule);
void createUser(__isl_take isl_ast_node *User);
void createBlock(__isl_take isl_ast_node *Block);
};
@ -671,10 +667,9 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
return NumberOfIterations + 1;
}
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
std::vector<Value*> &IVS,
__isl_take isl_id *IteratorID,
__isl_take isl_union_map *Schedule) {
void IslNodeBuilder::createUserVector(
__isl_take isl_ast_node *User, std::vector<Value *> &IVS,
__isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule) {
isl_id *Annotation = isl_ast_node_get_annotation(User);
assert(Annotation && "Vector user statement is not annotated");
@ -819,8 +814,8 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
// executed at least once, which will enable a lot of loop invariant
// code motion.
IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock,
Predicate);
IV =
createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock, Predicate);
IDToValue[IteratorID] = IV;
create(Body);
@ -853,8 +848,8 @@ void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
Builder.GetInsertPoint(), P);
BasicBlock *CondBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
CondBB->setName("polly.cond");
BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P);
MergeBB->setName("polly.merge");
@ -914,9 +909,10 @@ void IslNodeBuilder::createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
isl_ast_build_free(Context);
}
void IslNodeBuilder::createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
__isl_take isl_ast_build *Context, ScopStmt *Stmt, VectorValueMapT &VMap,
std::vector<Value*> &IVS, __isl_take isl_id *IteratorID) {
void IslNodeBuilder::createSubstitutionsVector(
__isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context,
ScopStmt *Stmt, VectorValueMapT &VMap, std::vector<Value *> &IVS,
__isl_take isl_id *IteratorID) {
int i = 0;
Value *OldValue = IDToValue[IteratorID];
@ -1033,8 +1029,7 @@ public:
return true;
}
virtual void printScop(raw_ostream &OS) const {
}
virtual void printScop(raw_ostream &OS) const {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();

View File

@ -73,12 +73,9 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
return IV;
}
void OMPGenerator::createCallParallelLoopStart(Value *SubFunction,
Value *SubfunctionParam,
Value *NumberOfThreads,
Value *LowerBound,
Value *UpperBound,
Value *Stride) {
void OMPGenerator::createCallParallelLoopStart(
Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
Value *LowerBound, Value *UpperBound, Value *Stride) {
Module *M = getModule();
const char *Name = "GOMP_parallel_loop_runtime_start";
Function *F = M->getFunction(Name);
@ -88,35 +85,23 @@ void OMPGenerator::createCallParallelLoopStart(Value *SubFunction,
Type *LongTy = getIntPtrTy();
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {
PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(),
Builder.getInt8PtrTy(),
false)),
Builder.getInt8PtrTy(),
Builder.getInt32Ty(),
LongTy,
LongTy,
LongTy,
};
Type *Params[] = { PointerType::getUnqual(FunctionType::get(
Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
LongTy, LongTy, };
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {
SubFunction,
SubfunctionParam,
NumberOfThreads,
LowerBound,
UpperBound,
Stride,
};
Value *Args[] = { SubFunction, SubfunctionParam, NumberOfThreads, LowerBound,
UpperBound, Stride, };
Builder.CreateCall(F, Args);
}
Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
Value *UpperBoundPtr) {
Value *
OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr) {
Module *M = getModule();
const char *Name = "GOMP_loop_runtime_next";
Function *F = M->getFunction(Name);
@ -126,23 +111,17 @@ Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {
LongPtrTy,
LongPtrTy,
};
Type *Params[] = { LongPtrTy, LongPtrTy, };
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {
LowerBoundPtr,
UpperBoundPtr,
};
Value *Args[] = { LowerBoundPtr, UpperBoundPtr, };
Value *Return = Builder.CreateCall(F, Args);
Return = Builder.CreateICmpNE(Return, Builder.CreateZExt(Builder.getFalse(),
Return->getType()));
Return = Builder.CreateICmpNE(
Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
return Return;
}
@ -219,9 +198,8 @@ Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value*> &Values) {
return Struct;
}
void OMPGenerator::extractValuesFromStruct(SetVector<Value*> OldValues,
Value *Struct,
ValueToValueMapTy &Map) {
void OMPGenerator::extractValuesFromStruct(
SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
for (unsigned i = 0; i < OldValues.size(); i++) {
Value *Address = Builder.CreateStructGEP(Struct, i);
Value *NewValue = Builder.CreateLoad(Address);
@ -229,10 +207,9 @@ void OMPGenerator::extractValuesFromStruct(SetVector<Value*> OldValues,
}
}
Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
SetVector<Value*> Data,
ValueToValueMapTy &Map,
Function **SubFunction) {
Value *OMPGenerator::createSubfunction(
Value *Stride, Value *StructData, SetVector<Value *> Data,
ValueToValueMapTy &Map, Function **SubFunction) {
Function *FN = createSubfunctionDefinition();
BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
@ -303,10 +280,9 @@ Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
return IV;
}
Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
Value *Stride,
SetVector<Value*> &Values,
ValueToValueMapTy &Map,
Value *OMPGenerator::createParallelLoop(
Value *LowerBound, Value *UpperBound, Value *Stride,
SetVector<Value *> &Values, ValueToValueMapTy &Map,
BasicBlock::iterator *LoopBody) {
Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
Function *SubFunction;
@ -319,15 +295,15 @@ Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
Builder.SetInsertPoint(PrevInsertPoint);
// Create call for GOMP_parallel_loop_runtime_start.
SubfunctionParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
"omp_data");
SubfunctionParam =
Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
NumberOfThreads = Builder.getInt32(0);
// Add one as the upper bound provided by openmp is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UpperBound = Builder.CreateAdd(UpperBound,
ConstantInt::get(getIntPtrTy(), 1));
UpperBound =
Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
LowerBound, UpperBound, Stride);

View File

@ -34,8 +34,8 @@ using namespace llvm;
using namespace polly;
PTXGenerator::PTXGenerator(IRBuilder<> &Builder, Pass *P,
const std::string &Triple):
Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1),
const std::string &Triple)
: Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1),
BlockWidth(1), BlockHeight(1), OutputBytes(0) {
InitializeGPUDataTypes();
@ -67,10 +67,9 @@ Function *PTXGenerator::createSubfunctionDefinition(int NumArgs) {
return FN;
}
void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
SetVector<Value*> &OriginalIVS,
PTXGenerator::ValueToValueMapTy &VMap,
Function **SubFunction) {
void PTXGenerator::createSubfunction(
SetVector<Value *> &UsedValues, SetVector<Value *> &OriginalIVS,
PTXGenerator::ValueToValueMapTy &VMap, Function **SubFunction) {
Function *FN = createSubfunctionDefinition(UsedValues.size());
Module *M = getModule();
LLVMContext &Context = FN->getContext();
@ -142,8 +141,8 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
Value *BlockID, *ThreadID;
switch (NumDims) {
case 1: {
Value *BlockSize = Builder.CreateMul(BlockWidth, BlockHeight,
"p_gpu_blocksize");
Value *BlockSize =
Builder.CreateMul(BlockWidth, BlockHeight, "p_gpu_blocksize");
BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i");
BlockID = Builder.CreateAdd(BlockID, BIDx);
BlockID = Builder.CreateMul(BlockID, BlockSize);
@ -183,11 +182,11 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
return;
}
assert(OriginalIVS.size() == Substitutions.size()
&& "The size of IVS should be equal to the size of substitutions.");
assert(OriginalIVS.size() == Substitutions.size() &&
"The size of IVS should be equal to the size of substitutions.");
for (unsigned i = 0; i < OriginalIVS.size(); ++i) {
VMap.insert(std::make_pair<Value*, Value*>(OriginalIVS[i],
Substitutions[i]));
VMap.insert(
std::make_pair<Value *, Value *>(OriginalIVS[i], Substitutions[i]));
}
Builder.CreateBr(ExitBB);
@ -202,10 +201,9 @@ void PTXGenerator::createSubfunction(SetVector<Value*> &UsedValues,
*SubFunction = FN;
}
void PTXGenerator::startGeneration(SetVector<Value*> &UsedValues,
SetVector<Value*> &OriginalIVS,
ValueToValueMapTy &VMap,
BasicBlock::iterator *LoopBody) {
void PTXGenerator::startGeneration(
SetVector<Value *> &UsedValues, SetVector<Value *> &OriginalIVS,
ValueToValueMapTy &VMap, BasicBlock::iterator *LoopBody) {
Function *SubFunction;
BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
createSubfunction(UsedValues, OriginalIVS, VMap, &SubFunction);
@ -213,9 +211,7 @@ void PTXGenerator::startGeneration(SetVector<Value*> &UsedValues,
Builder.SetInsertPoint(PrevInsertPoint);
}
IntegerType *PTXGenerator::getInt64Type() {
return Builder.getInt64Ty();
}
IntegerType *PTXGenerator::getInt64Type() { return Builder.getInt64Ty(); }
PointerType *PTXGenerator::getI8PtrType() {
return PointerType::getUnqual(Builder.getInt8Ty());
@ -320,9 +316,8 @@ void PTXGenerator::createCallGetPTXKernelEntry(Value *Entry, Value *Module,
Builder.CreateCall3(F, Entry, Module, Kernel);
}
void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData,
Value *DeviceData,
Value *Size) {
void PTXGenerator::createCallAllocateMemoryForHostAndDevice(
Value *HostData, Value *DeviceData, Value *Size) {
const char *Name = "polly_allocateMemoryForHostAndDevice";
Module *M = getModule();
Function *F = M->getFunction(Name);
@ -341,9 +336,8 @@ void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData,
Builder.CreateCall3(F, HostData, DeviceData, Size);
}
void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData,
Value *HostData,
Value *Size) {
void PTXGenerator::createCallCopyFromHostToDevice(
Value *DeviceData, Value *HostData, Value *Size) {
const char *Name = "polly_copyFromHostToDevice";
Module *M = getModule();
Function *F = M->getFunction(Name);
@ -362,9 +356,8 @@ void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData,
Builder.CreateCall3(F, DeviceData, HostData, Size);
}
void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData,
Value *DeviceData,
Value *Size) {
void PTXGenerator::createCallCopyFromDeviceToHost(
Value *HostData, Value *DeviceData, Value *Size) {
const char *Name = "polly_copyFromDeviceToHost";
Module *M = getModule();
Function *F = M->getFunction(Name);
@ -383,10 +376,8 @@ void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData,
Builder.CreateCall3(F, HostData, DeviceData, Size);
}
void PTXGenerator::createCallSetKernelParameters(Value *Kernel,
Value *BlockWidth,
Value *BlockHeight,
Value *DeviceData) {
void PTXGenerator::createCallSetKernelParameters(
Value *Kernel, Value *BlockWidth, Value *BlockHeight, Value *DeviceData) {
const char *Name = "polly_setKernelParameters";
Module *M = getModule();
Function *F = M->getFunction(Name);
@ -445,9 +436,8 @@ void PTXGenerator::createCallStartTimerByCudaEvent(Value *StartEvent,
Builder.CreateCall2(F, StartEvent, StopEvent);
}
void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent,
Value *StopEvent,
Value *Timer) {
void PTXGenerator::createCallStopTimerByCudaEvent(
Value *StartEvent, Value *StopEvent, Value *Timer) {
const char *Name = "polly_stopTimerByCudaEvent";
Module *M = getModule();
Function *F = M->getFunction(Name);
@ -466,10 +456,8 @@ void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent,
Builder.CreateCall3(F, StartEvent, StopEvent, Timer);
}
void PTXGenerator::createCallCleanupGPGPUResources(Value *HostData,
Value *DeviceData,
Value *Module,
Value *Context,
void PTXGenerator::createCallCleanupGPGPUResources(
Value *HostData, Value *DeviceData, Value *Module, Value *Context,
Value *Kernel) {
const char *Name = "polly_cleanupGPGPUResources";
llvm::Module *M = getModule();
@ -541,13 +529,12 @@ Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) {
}
}
Value *LLVMKernel = Builder.CreateGlobalStringPtr(LLVMKernelStr,
"llvm_kernel");
Value *LLVMKernel =
Builder.CreateGlobalStringPtr(LLVMKernelStr, "llvm_kernel");
Value *MCPU = Builder.CreateGlobalStringPtr("sm_10", "mcpu");
Value *Features = Builder.CreateGlobalStringPtr("", "cpu_features");
Function *GetDeviceKernel = Intrinsic::getDeclaration(M,
Intrinsic::codegen);
Function *GetDeviceKernel = Intrinsic::getDeclaration(M, Intrinsic::codegen);
return Builder.CreateCall3(GetDeviceKernel, LLVMKernel, MCPU, Features);
}
@ -596,22 +583,22 @@ void PTXGenerator::eraseUnusedFunctions(Function *SubFunction) {
void PTXGenerator::finishGeneration(Function *F) {
// Define data used by the GPURuntime library.
AllocaInst *PtrCUContext = Builder.CreateAlloca(getGPUContextPtrType(), 0,
"phcontext");
AllocaInst *PtrCUDevice = Builder.CreateAlloca(getGPUDevicePtrType(), 0,
"phdevice");
AllocaInst *PtrCUModule = Builder.CreateAlloca(getGPUModulePtrType(), 0,
"phmodule");
AllocaInst *PtrCUKernel = Builder.CreateAlloca(getGPUFunctionPtrType(), 0,
"phkernel");
AllocaInst *PtrCUStartEvent = Builder.CreateAlloca(getGPUEventPtrType(), 0,
"pstart_timer");
AllocaInst *PtrCUStopEvent = Builder.CreateAlloca(getGPUEventPtrType(), 0,
"pstop_timer");
AllocaInst *PtrDevData = Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0,
"pdevice_data");
AllocaInst *PtrHostData = Builder.CreateAlloca(getI8PtrType(), 0,
"phost_data");
AllocaInst *PtrCUContext =
Builder.CreateAlloca(getGPUContextPtrType(), 0, "phcontext");
AllocaInst *PtrCUDevice =
Builder.CreateAlloca(getGPUDevicePtrType(), 0, "phdevice");
AllocaInst *PtrCUModule =
Builder.CreateAlloca(getGPUModulePtrType(), 0, "phmodule");
AllocaInst *PtrCUKernel =
Builder.CreateAlloca(getGPUFunctionPtrType(), 0, "phkernel");
AllocaInst *PtrCUStartEvent =
Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstart_timer");
AllocaInst *PtrCUStopEvent =
Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstop_timer");
AllocaInst *PtrDevData =
Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0, "pdevice_data");
AllocaInst *PtrHostData =
Builder.CreateAlloca(getI8PtrType(), 0, "phost_data");
Type *FloatTy = llvm::Type::getFloatTy(getModule()->getContext());
AllocaInst *PtrElapsedTimes = Builder.CreateAlloca(FloatTy, 0, "ptimer");
@ -648,13 +635,11 @@ void PTXGenerator::finishGeneration(Function *F) {
// Record the end time.
LoadInst *CUStartEvent = Builder.CreateLoad(PtrCUStartEvent, "start_timer");
LoadInst *CUStopEvent = Builder.CreateLoad(PtrCUStopEvent, "stop_timer");
createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent,
PtrElapsedTimes);
createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent, PtrElapsedTimes);
// Cleanup all the resources used.
LoadInst *CUContext = Builder.CreateLoad(PtrCUContext, "cucontext");
createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext,
CUKernel);
createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext, CUKernel);
// Erase the ptx kernel and device subfunctions and ptx intrinsics from
// current module.