From c14582f2767fe47a1f7e3ed3aafefb17366d95dc Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Tue, 5 Feb 2013 18:01:29 +0000 Subject: [PATCH] CodeGen: clang-format goodness The changed files are not yet clang-format clean, but we are getting close. llvm-svn: 174403 --- polly/lib/CodeGen/BlockGenerators.cpp | 208 ++++++++++------------ polly/lib/CodeGen/Cloog.cpp | 50 ++---- polly/lib/CodeGen/CodeGeneration.cpp | 219 ++++++++++++------------ polly/lib/CodeGen/IslAst.cpp | 54 +++--- polly/lib/CodeGen/IslCodeGeneration.cpp | 151 ++++++++-------- polly/lib/CodeGen/LoopGenerators.cpp | 92 ++++------ polly/lib/CodeGen/PTXGenerator.cpp | 157 ++++++++--------- 7 files changed, 415 insertions(+), 516 deletions(-) diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index ca2fdacac873..132f1f89e806 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -31,15 +31,13 @@ using namespace llvm; using namespace polly; static cl::opt -Aligned("enable-polly-aligned", - cl::desc("Assumed aligned memory accesses."), cl::Hidden, - cl::value_desc("OpenMP code generation enabled if true"), - cl::init(false), cl::ZeroOrMore); +Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."), + cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), + cl::init(false), cl::ZeroOrMore); static cl::opt -SCEVCodegen("polly-codegen-scev", - cl::desc("Use SCEV based code generation."), cl::Hidden, - cl::init(false), cl::ZeroOrMore); +SCEVCodegen("polly-codegen-scev", cl::desc("Use SCEV based code generation."), + cl::Hidden, cl::init(false), cl::ZeroOrMore); /// The SCEVRewriter takes a scalar evolution expression and updates the /// following components: @@ -81,7 +79,7 @@ SCEVCodegen("polly-codegen-scev", /// - Instructions that reference operands already calculated within the /// basic block. /// - Store instructions -struct SCEVRewriter : public SCEVVisitor { +struct SCEVRewriter : public SCEVVisitor { public: static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap, ValueMapT &BBMap) { @@ -114,13 +112,10 @@ public: return Expr; } - - return SCEVVisitor::visit(Expr); + return SCEVVisitor::visit(Expr); } - const SCEV *visitConstant(const SCEVConstant *Constant) { - return Constant; - } + const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; } const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { const SCEV *Operand = visit(Expr->getOperand()); @@ -364,13 +359,13 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, // We assume constants never change. // This avoids map lookups for many calls to this function. if (isa(Old)) - return const_cast(Old); + return const_cast(Old); if (GlobalMap.count(Old)) { Value *New = GlobalMap[Old]; - if (Old->getType()->getScalarSizeInBits() - < New->getType()->getScalarSizeInBits()) + if (Old->getType()->getScalarSizeInBits() < + New->getType()->getScalarSizeInBits()) New = Builder.CreateTruncOrBitCast(New, Old->getType()); return New; @@ -381,11 +376,10 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, } if (SCEVCodegen && SE.isSCEVable(Old->getType())) - if (const SCEV *Scev = SE.getSCEV(const_cast(Old))) + if (const SCEV *Scev = SE.getSCEV(const_cast(Old))) if (!isa(Scev)) { - const SCEV *NewScev = SCEVRewriter::rewrite(Scev, - *Statement.getParent(), SE, - GlobalMap, BBMap); + const SCEV *NewScev = SCEVRewriter::rewrite( + Scev, *Statement.getParent(), SE, GlobalMap, BBMap); SCEVExpander Expander(SE, "polly"); Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), Builder.GetInsertPoint()); @@ -405,7 +399,7 @@ Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap, // Everything else is probably a scop-constant value defined as global, // function parameter or an instruction not within the scop. - return const_cast(Old); + return const_cast(Old); } void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, @@ -414,13 +408,14 @@ void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, // Replace old operands with the new ones. for (Instruction::const_op_iterator OI = Inst->op_begin(), - OE = Inst->op_end(); OI != OE; ++OI) { + OE = Inst->op_end(); + OI != OE; ++OI) { Value *OldOperand = *OI; Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap); if (!NewOperand) { - assert(!isa(NewInst) - && "Store instructions are always needed!"); + assert(!isa(NewInst) && + "Store instructions are always needed!"); delete NewInst; return; } @@ -435,9 +430,9 @@ void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap, NewInst->setName("p_" + Inst->getName()); } -std::vector BlockGenerator::getMemoryAccessIndex( - __isl_keep isl_map *AccessRelation, Value *BaseAddress, - ValueMapT &BBMap, ValueMapT &GlobalMap) { +std::vector BlockGenerator::getMemoryAccessIndex( + __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap, + ValueMapT &GlobalMap) { assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) && "Only single dimensional access functions supported"); @@ -456,7 +451,7 @@ std::vector BlockGenerator::getMemoryAccessIndex( Type *Ty = Builder.getInt64Ty(); OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true); - std::vector IndexArray; + std::vector IndexArray; Value *NullValue = Constant::getNullValue(Ty); IndexArray.push_back(NullValue); IndexArray.push_back(OffsetValue); @@ -464,20 +459,18 @@ std::vector BlockGenerator::getMemoryAccessIndex( } Value *BlockGenerator::getNewAccessOperand( - __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, - ValueMapT &BBMap, ValueMapT &GlobalMap) { - std::vector IndexArray = getMemoryAccessIndex(NewAccessRelation, - BaseAddress, - BBMap, GlobalMap); - Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray, - "p_newarrayidx_"); + __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap, + ValueMapT &GlobalMap) { + std::vector IndexArray = + getMemoryAccessIndex(NewAccessRelation, BaseAddress, BBMap, GlobalMap); + Value *NewOperand = + Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_"); return NewOperand; } -Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, - const Value *Pointer, - ValueMapT &BBMap, - ValueMapT &GlobalMap) { +Value *BlockGenerator::generateLocationAccessed( + const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap, + ValueMapT &GlobalMap) { MemoryAccess &Access = Statement.getAccessFor(Inst); isl_map *CurrentAccessRelation = Access.getAccessRelation(); isl_map *NewAccessRelation = Access.getNewAccessRelation(); @@ -490,9 +483,9 @@ Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, if (!NewAccessRelation) { NewPointer = getNewValue(Pointer, BBMap, GlobalMap); } else { - Value *BaseAddress = const_cast(Access.getBaseAddr()); - NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, - BBMap, GlobalMap); + Value *BaseAddress = const_cast(Access.getBaseAddr()); + NewPointer = + getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, GlobalMap); } isl_map_free(CurrentAccessRelation); @@ -500,23 +493,21 @@ Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst, return NewPointer; } -Value *BlockGenerator::generateScalarLoad(const LoadInst *Load, - ValueMapT &BBMap, - ValueMapT &GlobalMap) { +Value *BlockGenerator::generateScalarLoad( + const LoadInst *Load, ValueMapT &BBMap, ValueMapT &GlobalMap) { const Value *Pointer = Load->getPointerOperand(); const Instruction *Inst = dyn_cast(Load); Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap); - Value *ScalarLoad = Builder.CreateLoad(NewPointer, - Load->getName() + "_p_scalar_"); + Value *ScalarLoad = + Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); return ScalarLoad; } -Value *BlockGenerator::generateScalarStore(const StoreInst *Store, - ValueMapT &BBMap, - ValueMapT &GlobalMap) { +Value *BlockGenerator::generateScalarStore( + const StoreInst *Store, ValueMapT &BBMap, ValueMapT &GlobalMap) { const Value *Pointer = Store->getPointerOperand(); - Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap, - GlobalMap); + Value *NewPointer = + generateLocationAccessed(Store, Pointer, BBMap, GlobalMap); Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap); return Builder.CreateStore(ValueOperand, NewPointer); @@ -547,8 +538,8 @@ void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap, void BlockGenerator::copyBB(ValueMapT &GlobalMap) { BasicBlock *BB = Statement.getBasicBlock(); - BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), - Builder.GetInsertPoint(), P); + BasicBlock *CopyBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); CopyBB->setName("polly.stmt." + BB->getName()); Builder.SetInsertPoint(CopyBB->begin()); @@ -556,22 +547,19 @@ void BlockGenerator::copyBB(ValueMapT &GlobalMap) { for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) - copyInstruction(II, BBMap, GlobalMap); + copyInstruction(II, BBMap, GlobalMap); } -VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B, - VectorValueMapT &GlobalMaps, - ScopStmt &Stmt, - __isl_keep isl_map *Schedule, - Pass *P) - : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) { +VectorBlockGenerator::VectorBlockGenerator( + IRBuilder<> &B, VectorValueMapT &GlobalMaps, ScopStmt &Stmt, + __isl_keep isl_map *Schedule, Pass *P) + : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) { assert(GlobalMaps.size() > 1 && "Only one vector lane found"); assert(Schedule && "No statement domain provided"); } -Value *VectorBlockGenerator::getVectorValue(const Value *Old, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps) { +Value *VectorBlockGenerator::getVectorValue( + const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { if (VectorMap.count(Old)) return VectorMap[Old]; @@ -580,11 +568,9 @@ Value *VectorBlockGenerator::getVectorValue(const Value *Old, Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width)); for (int Lane = 0; Lane < Width; Lane++) - Vector = Builder.CreateInsertElement(Vector, - getNewValue(Old, - ScalarMaps[Lane], - GlobalMaps[Lane]), - Builder.getInt32(Lane)); + Vector = Builder.CreateInsertElement( + Vector, getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane]), + Builder.getInt32(Lane)); VectorMap[Old] = Vector; @@ -606,10 +592,10 @@ Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load, const Value *Pointer = Load->getPointerOperand(); Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); - Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, - "vector_ptr"); - LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, - Load->getName() + "_p_vec_full"); + Value *VectorPtr = + Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); + LoadInst *VecLoad = + Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); if (!Aligned) VecLoad->setAlignment(8); @@ -623,52 +609,47 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, Load->getName() + "_p_vec_p"); - LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr, - Load->getName() + "_p_splat_one"); + LoadInst *ScalarLoad = + Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); if (!Aligned) ScalarLoad->setAlignment(8); - Constant *SplatVector = - Constant::getNullValue(VectorType::get(Builder.getInt32Ty(), - getVectorWidth())); + Constant *SplatVector = Constant::getNullValue( + VectorType::get(Builder.getInt32Ty(), getVectorWidth())); - Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad, - SplatVector, - Load->getName() - + "_p_splat"); + Value *VectorLoad = Builder.CreateShuffleVector( + ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); return VectorLoad; } -Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load, - VectorValueMapT &ScalarMaps) { +Value *VectorBlockGenerator::generateUnknownStrideLoad( + const LoadInst *Load, VectorValueMapT &ScalarMaps) { int VectorWidth = getVectorWidth(); const Value *Pointer = Load->getPointerOperand(); VectorType *VectorType = VectorType::get( - dyn_cast(Pointer->getType())->getElementType(), VectorWidth); + dyn_cast(Pointer->getType())->getElementType(), VectorWidth); Value *Vector = UndefValue::get(VectorType); for (int i = 0; i < VectorWidth; i++) { Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]); - Value *ScalarLoad = Builder.CreateLoad(NewPointer, - Load->getName() + "_p_scalar_"); - Vector = Builder.CreateInsertElement(Vector, ScalarLoad, - Builder.getInt32(i), - Load->getName() + "_p_vec_"); + Value *ScalarLoad = + Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_"); + Vector = Builder.CreateInsertElement( + Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); } return Vector; } -void VectorBlockGenerator::generateLoad(const LoadInst *Load, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps) { +void VectorBlockGenerator::generateLoad( + const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || !VectorType::isValidElementType(Load->getType())) { for (int i = 0; i < getVectorWidth(); i++) - ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], - GlobalMaps[i]); + ScalarMaps[i][Load] = + generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i]); return; } @@ -689,8 +670,8 @@ void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { int VectorWidth = getVectorWidth(); - Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, - ScalarMaps); + Value *NewOperand = + getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps); assert(isa(Inst) && "Can not generate vector code for instruction"); @@ -714,23 +695,22 @@ void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst, VectorMap[Inst] = NewInst; } -void VectorBlockGenerator::copyStore(const StoreInst *Store, - ValueMapT &VectorMap, - VectorValueMapT &ScalarMaps) { +void VectorBlockGenerator::copyStore( + const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { int VectorWidth = getVectorWidth(); MemoryAccess &Access = Statement.getAccessFor(Store); const Value *Pointer = Store->getPointerOperand(); - Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap, - ScalarMaps); + Value *Vector = + getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps); if (Access.isStrideOne(isl_map_copy(Schedule))) { Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]); - Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, - "vector_ptr"); + Value *VectorPtr = + Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); if (!Aligned) @@ -747,7 +727,8 @@ void VectorBlockGenerator::copyStore(const StoreInst *Store, bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap) { for (Instruction::const_op_iterator OI = Inst->op_begin(), - OE = Inst->op_end(); OI != OE; ++OI) + OE = Inst->op_end(); + OI != OE; ++OI) if (VectorMap.count(*OI)) return true; return false; @@ -760,7 +741,8 @@ bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst, int VectorWidth = getVectorWidth(); for (Instruction::const_op_iterator OI = Inst->op_begin(), - OE = Inst->op_end(); OI != OE; ++OI) { + OE = Inst->op_end(); + OI != OE; ++OI) { ValueMapT::iterator VecOp = VectorMap.find(*OI); if (VecOp == VectorMap.end()) @@ -810,9 +792,7 @@ void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst, VectorMap[Inst] = Vector; } -int VectorBlockGenerator::getVectorWidth() { - return GlobalMaps.size(); -} +int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); } void VectorBlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &VectorMap, @@ -855,8 +835,8 @@ void VectorBlockGenerator::copyInstruction(const Instruction *Inst, void VectorBlockGenerator::copyBB() { BasicBlock *BB = Statement.getBasicBlock(); - BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), - Builder.GetInsertPoint(), P); + BasicBlock *CopyBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); CopyBB->setName("polly.stmt." + BB->getName()); Builder.SetInsertPoint(CopyBB->begin()); @@ -877,7 +857,7 @@ void VectorBlockGenerator::copyBB() { VectorValueMapT ScalarBlockMap(getVectorWidth()); ValueMapT VectorBlockMap; - for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) - copyInstruction(II, VectorBlockMap, ScalarBlockMap); + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) + copyInstruction(II, VectorBlockMap, ScalarBlockMap); } diff --git a/polly/lib/CodeGen/Cloog.cpp b/polly/lib/CodeGen/Cloog.cpp index 56745491729d..4f0bef4f28ac 100644 --- a/polly/lib/CodeGen/Cloog.cpp +++ b/polly/lib/CodeGen/Cloog.cpp @@ -97,9 +97,7 @@ public: //close(FD[1]); } - FILE *getInputFile() { - return input; - } + FILE *getInputFile() { return input; } void closeInput() { fclose(input); @@ -141,9 +139,7 @@ void Cloog::pprint(raw_ostream &OS) { } /// Create the Cloog AST from this program. -struct clast_root *Cloog::getClast() { - return (clast_root*)ClastRoot; -} +struct clast_root *Cloog::getClast() { return (clast_root *)ClastRoot; } void Cloog::buildCloogOptions() { Options = cloog_options_malloc(State); @@ -217,7 +213,7 @@ CloogInput *Cloog::buildCloogInput() { } void ClastVisitor::visit(const clast_stmt *stmt) { - if (CLAST_STMT_IS_A(stmt, stmt_root)) + if (CLAST_STMT_IS_A(stmt, stmt_root)) assert(false && "No second root statement expected"); else if (CLAST_STMT_IS_A(stmt, stmt_ass)) return visitAssignment((const clast_assignment *)stmt); @@ -234,20 +230,13 @@ void ClastVisitor::visit(const clast_stmt *stmt) { visit(stmt->next); } -void ClastVisitor::visitAssignment(const clast_assignment *stmt) { -} +void ClastVisitor::visitAssignment(const clast_assignment *stmt) {} -void ClastVisitor::visitBlock(const clast_block *stmt) { - visit(stmt->body); -} +void ClastVisitor::visitBlock(const clast_block *stmt) { visit(stmt->body); } -void ClastVisitor::visitFor(const clast_for *stmt) { - visit(stmt->body); -} +void ClastVisitor::visitFor(const clast_for *stmt) { visit(stmt->body); } -void ClastVisitor::visitGuard(const clast_guard *stmt) { - visit(stmt->then); -} +void ClastVisitor::visitGuard(const clast_guard *stmt) { visit(stmt->then); } } // End namespace polly. @@ -295,7 +284,7 @@ bool CloogExporter::runOnScop(Scop &S) { std::string Filename = getFileName(&R); errs() << "Writing Scop '" << R.getNameStr() << "' in function '" - << FunctionName << "' to '" << Filename << "'...\n"; + << FunctionName << "' to '" << Filename << "'...\n"; FILE *F = fopen(Filename.c_str(), "w"); C.dump(F); @@ -310,29 +299,20 @@ void CloogExporter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); } -static RegisterPass A("polly-export-cloog", - "Polly - Export the Cloog input file" - " (Writes a .cloog file for each Scop)" - ); +static RegisterPass +A("polly-export-cloog", "Polly - Export the Cloog input file" + " (Writes a .cloog file for each Scop)"); -llvm::Pass *polly::createCloogExporterPass() { - return new CloogExporter(); -} +llvm::Pass *polly::createCloogExporterPass() { return new CloogExporter(); } /// Write a .cloog input file -void CloogInfo::dump(FILE *F) { - C->dump(F); -} +void CloogInfo::dump(FILE *F) { C->dump(F); } /// Print a source code representation of the program. -void CloogInfo::pprint(llvm::raw_ostream &OS) { - C->pprint(OS); -} +void CloogInfo::pprint(llvm::raw_ostream &OS) { C->pprint(OS); } /// Create the Cloog AST from this program. -const struct clast_root *CloogInfo::getClast() { - return C->getClast(); -} +const struct clast_root *CloogInfo::getClast() { return C->getClast(); } void CloogInfo::releaseMemory() { if (C) { diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 2b71154a7e92..8398c41da27a 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -61,25 +61,22 @@ struct isl_set; namespace polly { static cl::opt -OpenMP("enable-polly-openmp", - cl::desc("Generate OpenMP parallel code"), cl::Hidden, - cl::value_desc("OpenMP code generation enabled if true"), +OpenMP("enable-polly-openmp", cl::desc("Generate OpenMP parallel code"), + cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"), cl::init(false), cl::ZeroOrMore); #ifdef GPU_CODEGEN static cl::opt -GPGPU("enable-polly-gpgpu", - cl::desc("Generate GPU parallel code"), cl::Hidden, - cl::value_desc("GPGPU code generation enabled if true"), - cl::init(false), cl::ZeroOrMore); +GPGPU("enable-polly-gpgpu", cl::desc("Generate GPU parallel code"), cl::Hidden, + cl::value_desc("GPGPU code generation enabled if true"), cl::init(false), + cl::ZeroOrMore); -static cl::opt -GPUTriple("polly-gpgpu-triple", - cl::desc("Target triple for GPU code generation"), - cl::Hidden, cl::init("")); +static cl::opt GPUTriple( + "polly-gpgpu-triple", cl::desc("Target triple for GPU code generation"), + cl::Hidden, cl::init("")); #endif /* GPU_CODEGEN */ -typedef DenseMap CharMapT; +typedef DenseMap CharMapT; /// Class to generate LLVM-IR that calculates the value of a clast_expr. class ClastExpCodeGen { @@ -140,58 +137,52 @@ Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) { switch (e->type) { case clast_bin_mod: return Builder.CreateSRem(LHS, RHS); - case clast_bin_fdiv: - { - // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d - Value *One = ConstantInt::get(Ty, 1); - Value *Zero = ConstantInt::get(Ty, 0); - Value *Sum1 = Builder.CreateSub(LHS, RHS); - Value *Sum2 = Builder.CreateAdd(Sum1, One); - Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); - Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); - return Builder.CreateSDiv(Dividend, RHS); - } - case clast_bin_cdiv: - { - // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d - Value *One = ConstantInt::get(Ty, 1); - Value *Zero = ConstantInt::get(Ty, 0); - Value *Sum1 = Builder.CreateAdd(LHS, RHS); - Value *Sum2 = Builder.CreateSub(Sum1, One); - Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); - Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); - return Builder.CreateSDiv(Dividend, RHS); - } + case clast_bin_fdiv: { + // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d + Value *One = ConstantInt::get(Ty, 1); + Value *Zero = ConstantInt::get(Ty, 0); + Value *Sum1 = Builder.CreateSub(LHS, RHS); + Value *Sum2 = Builder.CreateAdd(Sum1, One); + Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); + Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); + return Builder.CreateSDiv(Dividend, RHS); + } + case clast_bin_cdiv: { + // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d + Value *One = ConstantInt::get(Ty, 1); + Value *Zero = ConstantInt::get(Ty, 0); + Value *Sum1 = Builder.CreateAdd(LHS, RHS); + Value *Sum2 = Builder.CreateSub(Sum1, One); + Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); + Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); + return Builder.CreateSDiv(Dividend, RHS); + } case clast_bin_div: return Builder.CreateSDiv(LHS, RHS); - }; + } llvm_unreachable("Unknown clast binary expression type"); } Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) { - assert(( r->type == clast_red_min - || r->type == clast_red_max - || r->type == clast_red_sum) - && "Clast reduction type not supported"); + assert((r->type == clast_red_min || r->type == clast_red_max || + r->type == clast_red_sum) && "Clast reduction type not supported"); Value *old = codegen(r->elts[0], Ty); for (int i = 1; i < r->n; ++i) { Value *exprValue = codegen(r->elts[i], Ty); switch (r->type) { - case clast_red_min: - { - Value *cmp = Builder.CreateICmpSLT(old, exprValue); - old = Builder.CreateSelect(cmp, old, exprValue); - break; - } - case clast_red_max: - { - Value *cmp = Builder.CreateICmpSGT(old, exprValue); - old = Builder.CreateSelect(cmp, old, exprValue); - break; - } + case clast_red_min: { + Value *cmp = Builder.CreateICmpSLT(old, exprValue); + old = Builder.CreateSelect(cmp, old, exprValue); + break; + } + case clast_red_max: { + Value *cmp = Builder.CreateICmpSGT(old, exprValue); + old = Builder.CreateSelect(cmp, old, exprValue); + break; + } case clast_red_sum: old = Builder.CreateAdd(old, exprValue); break; @@ -205,7 +196,7 @@ ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap) : Builder(B), IVS(IVMap) {} Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) { - switch(e->type) { + switch (e->type) { case clast_expr_name: return codegen((const clast_name *)e, Ty); case clast_expr_term: @@ -260,11 +251,11 @@ private: unsigned Dimension, int vectorDim, std::vector *VectorVMap = 0); - void codegenSubstitutions(const clast_stmt *Assignment, - ScopStmt *Statement, int vectorDim = 0, + void codegenSubstitutions(const clast_stmt *Assignment, ScopStmt *Statement, + int vectorDim = 0, std::vector *VectorVMap = 0); - void codegen(const clast_user_stmt *u, std::vector *IVS = NULL, + void codegen(const clast_user_stmt *u, std::vector *IVS = NULL, const char *iterator = NULL, isl_set *scatteringDomain = 0); void codegen(const clast_block *b); @@ -276,7 +267,7 @@ private: /// /// Create a list of values that has to be stored into the OpenMP subfuncition /// structure. - SetVector getOMPValues(const clast_stmt *Body); + SetVector getOMPValues(const clast_stmt *Body); /// @brief Update ClastVars and ValueMap according to a value map. /// @@ -295,7 +286,7 @@ private: /// Create a list of values that will be set to be parameters of the GPGPU /// subfunction. These parameters represent device memory base addresses /// and the size in bytes. - SetVector getGPUValues(unsigned &OutputBytes); + SetVector getGPUValues(unsigned &OutputBytes); /// @brief Create a GPU parallel for loop. /// @@ -304,10 +295,9 @@ private: void codegenForGPGPU(const clast_for *F); /// @brief Get innermost for loop. - const clast_stmt *getScheduleInfo(const clast_for *F, - std::vector &NumIters, - unsigned &LoopDepth, - unsigned &NonPLoopDepth); + const clast_stmt * + getScheduleInfo(const clast_for *F, std::vector &NumIters, + unsigned &LoopDepth, unsigned &NonPLoopDepth); #endif /* GPU_CODEGEN */ /// @brief Check if a loop is parallel @@ -379,15 +369,15 @@ void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt, ValueMap[PN] = RHS; } -void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, - ScopStmt *Statement, int vectorDim, - std::vector *VectorVMap) { +void ClastStmtCodeGen::codegenSubstitutions( + const clast_stmt *Assignment, ScopStmt *Statement, int vectorDim, + std::vector *VectorVMap) { int Dimension = 0; while (Assignment) { assert(CLAST_STMT_IS_A(Assignment, stmt_ass) && "Substitions are expected to be assignments"); - codegen((const clast_assignment *) Assignment, Statement, Dimension, + codegen((const clast_assignment *)Assignment, Statement, Dimension, vectorDim, VectorVMap); Assignment = Assignment->next; Dimension++; @@ -397,18 +387,19 @@ void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, // Takes the cloog specific domain and translates it into a map Statement -> // PartialSchedule, where the PartialSchedule contains all the dimensions that // have been code generated up to this point. -static __isl_give isl_map *extractPartialSchedule(ScopStmt *Statement, - isl_set *Domain) { +static __isl_give isl_map * +extractPartialSchedule(ScopStmt *Statement, isl_set *Domain) { isl_map *Schedule = Statement->getScattering(); int ScheduledDimensions = isl_set_dim(Domain, isl_dim_set); - int UnscheduledDimensions = isl_map_dim(Schedule, isl_dim_out) - ScheduledDimensions; + int UnscheduledDimensions = + isl_map_dim(Schedule, isl_dim_out) - ScheduledDimensions; return isl_map_project_out(Schedule, isl_dim_out, ScheduledDimensions, UnscheduledDimensions); } void ClastStmtCodeGen::codegen(const clast_user_stmt *u, - std::vector *IVS , const char *iterator, + std::vector *IVS, const char *iterator, isl_set *Domain) { ScopStmt *Statement = (ScopStmt *)u->statement->usr; @@ -427,7 +418,7 @@ void ClastStmtCodeGen::codegen(const clast_user_stmt *u, if (IVS) { assert(u->substitutions && "Substitutions expected!"); int i = 0; - for (std::vector::iterator II = IVS->begin(), IE = IVS->end(); + for (std::vector::iterator II = IVS->begin(), IE = IVS->end(); II != IE; ++II) { ClastVars[iterator] = *II; codegenSubstitutions(u->substitutions, Statement, i, &VectorMap); @@ -473,7 +464,7 @@ void ClastStmtCodeGen::codegenForSequential(const clast_for *f) { class ParameterVisitor : public ClastVisitor { std::set Values; public: - ParameterVisitor() : ClastVisitor(), Values() { } + ParameterVisitor() : ClastVisitor(), Values() {} void visitUser(const clast_user_stmt *Stmt) { const ScopStmt *S = static_cast(Stmt->statement->usr); @@ -484,7 +475,8 @@ public: ++BI) { const Instruction &Inst = *BI; for (Instruction::const_op_iterator II = Inst.op_begin(), - IE = Inst.op_end(); II != IE; ++II) { + IE = Inst.op_end(); + II != IE; ++II) { Value *SrcVal = *II; if (Instruction *OpInst = dyn_cast(SrcVal)) @@ -500,15 +492,15 @@ public: // Iterator to iterate over the values found. typedef std::set::const_iterator const_iterator; inline const_iterator begin() const { return Values.begin(); } - inline const_iterator end() const { return Values.end(); } + inline const_iterator end() const { return Values.end(); } }; -SetVector ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) { - SetVector Values; +SetVector ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) { + SetVector Values; // The clast variables - for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); - I != E; I++) + for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); I != E; + I++) Values.insert(I->second); // Find the temporaries that are referenced in the clast statements' @@ -529,17 +521,18 @@ SetVector ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) { } void ClastStmtCodeGen::updateWithValueMap( - OMPGenerator::ValueToValueMapTy &VMap) { - std::set Inserted; + OMPGenerator::ValueToValueMapTy &VMap) { + std::set Inserted; - for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); - I != E; I++) { + for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); I != E; + I++) { ClastVars[I->first] = VMap[I->second]; Inserted.insert(I->second); } for (OMPGenerator::ValueToValueMapTy::iterator I = VMap.begin(), - E = VMap.end(); I != E; ++I) { + E = VMap.end(); + I != E; ++I) { if (Inserted.count(I->first)) continue; @@ -549,11 +542,11 @@ void ClastStmtCodeGen::updateWithValueMap( static void clearDomtree(Function *F, DominatorTree &DT) { DomTreeNode *N = DT.getNode(&F->getEntryBlock()); - std::vector Nodes; - for (po_iterator I = po_begin(N), E = po_end(N); I != E; ++I) + std::vector Nodes; + for (po_iterator I = po_begin(N), E = po_end(N); I != E; ++I) Nodes.push_back(I->getBlock()); - for (std::vector::iterator I = Nodes.begin(), E = Nodes.end(); + for (std::vector::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) DT.eraseNode(*I); } @@ -562,7 +555,7 @@ void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { Value *Stride, *LB, *UB, *IV; BasicBlock::iterator LoopBody; IntegerType *IntPtrTy = getIntPtrTy(); - SetVector Values; + SetVector Values; OMPGenerator::ValueToValueMapTy VMap; OMPGenerator OMPGen(Builder, P); @@ -608,16 +601,17 @@ static unsigned getArraySizeInBytes(const ArrayType *AT) { return Bytes; } -SetVector ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) { - SetVector Values; +SetVector ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) { + SetVector Values; OutputBytes = 0; // Record the memory reference base addresses. for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; - for (SmallVector::iterator I = Stmt->memacc_begin(), - E = Stmt->memacc_end(); I != E; ++I) { - Value *BaseAddr = const_cast((*I)->getBaseAddr()); + for (SmallVector::iterator I = Stmt->memacc_begin(), + E = Stmt->memacc_end(); + I != E; ++I) { + Value *BaseAddr = const_cast((*I)->getBaseAddr()); Values.insert((BaseAddr)); // FIXME: we assume that there is one and only one array to be written @@ -627,8 +621,8 @@ SetVector ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) { ++NumWrites; assert(NumWrites <= 1 && "We support at most one array to be written in a SCoP."); - if (const PointerType * PT = - dyn_cast(BaseAddr->getType())) { + if (const PointerType *PT = + dyn_cast(BaseAddr->getType())) { Type *T = PT->getArrayElementType(); const ArrayType *ATy = dyn_cast(T); OutputBytes = getArraySizeInBytes(ATy); @@ -640,10 +634,9 @@ SetVector ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) { return Values; } -const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F, - std::vector &NumIters, - unsigned &LoopDepth, - unsigned &NonPLoopDepth) { +const clast_stmt *ClastStmtCodeGen::getScheduleInfo( + const clast_for *F, std::vector &NumIters, unsigned &LoopDepth, + unsigned &NonPLoopDepth) { clast_stmt *Stmt = (clast_stmt *)F; const clast_for *Result; bool NonParaFlag = false; @@ -652,7 +645,7 @@ const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F, while (Stmt) { if (CLAST_STMT_IS_A(Stmt, stmt_for)) { - const clast_for *T = (clast_for *) Stmt; + const clast_for *T = (clast_for *)Stmt; if (isParallelFor(T)) { if (!NonParaFlag) { NumIters.push_back(getNumberOfIterations(T)); @@ -672,8 +665,8 @@ const clast_stmt *ClastStmtCodeGen::getScheduleInfo(const clast_for *F, "The loops should be tiled into 4-depth parallel loops and an " "innermost non-parallel one (if exist)."); NonPLoopDepth = LoopDepth - NumIters.size(); - assert(NonPLoopDepth <= 1 - && "We support only one innermost non-parallel loop currently."); + assert(NonPLoopDepth <= 1 && + "We support only one innermost non-parallel loop currently."); return (const clast_stmt *)Result->body; } @@ -690,8 +683,8 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) { // Get original IVS and ScopStmt unsigned TiledLoopDepth, NonPLoopDepth; - const clast_stmt *InnerStmt = getScheduleInfo(F, NumIterations, - TiledLoopDepth, NonPLoopDepth); + const clast_stmt *InnerStmt = + getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth); const clast_stmt *TmpStmt; const clast_user_stmt *U; const clast_for *InnerFor; @@ -700,8 +693,8 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) { TmpStmt = InnerFor->body; } else TmpStmt = InnerStmt; - U = (const clast_user_stmt *) TmpStmt; - ScopStmt *Statement = (ScopStmt *) U->statement->usr; + U = (const clast_user_stmt *)TmpStmt; + ScopStmt *Statement = (ScopStmt *)U->statement->usr; for (unsigned i = 0; i < Statement->getNumIterators() - NonPLoopDepth; i++) { const Value *IV = Statement->getInductionVariableForDimension(i); IVS.insert(const_cast(IV)); @@ -726,7 +719,7 @@ void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) { CmpInst::ICMP_SLE); const Value *OldIV_ = Statement->getInductionVariableForDimension(2); Value *OldIV = const_cast(OldIV_); - VMap.insert(std::make_pair(OldIV, IV)); + VMap.insert(std::make_pair(OldIV, IV)); } updateWithValueMap(VMap); @@ -787,11 +780,11 @@ void ClastStmtCodeGen::codegenForVector(const clast_for *F) { Stride = Stride.zext(LoopIVType->getBitWidth()); Value *StrideValue = ConstantInt::get(LoopIVType, Stride); - std::vector IVS(VectorWidth); + std::vector IVS(VectorWidth); IVS[0] = LB; for (int i = 1; i < VectorWidth; i++) - IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); + IVS[i] = Builder.CreateAdd(IVS[i - 1], StrideValue, "p_vector_iv"); isl_set *Domain = isl_set_from_cloog_domain(F->domain); @@ -872,8 +865,8 @@ void ClastStmtCodeGen::codegen(const clast_guard *g) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); - BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), - Builder.GetInsertPoint(), P); + BasicBlock *CondBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); CondBB->setName("polly.cond"); BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P); MergeBB->setName("polly.merge"); @@ -905,7 +898,7 @@ void ClastStmtCodeGen::codegen(const clast_guard *g) { } void ClastStmtCodeGen::codegen(const clast_stmt *stmt) { - if (CLAST_STMT_IS_A(stmt, stmt_root)) + if (CLAST_STMT_IS_A(stmt, stmt_root)) assert(false && "No second root statement expected"); else if (CLAST_STMT_IS_A(stmt, stmt_ass)) codegen((const clast_assignment *)stmt); @@ -946,7 +939,7 @@ void ClastStmtCodeGen::codegen(const clast_root *r) { parallelCodeGeneration = false; - const clast_stmt *stmt = (const clast_stmt*) r; + const clast_stmt *stmt = (const clast_stmt *)r; if (stmt->next) codegen(stmt->next); } @@ -964,7 +957,6 @@ public: CodeGeneration() : ScopPass(ID) {} - bool runOnScop(Scop &S) { ParallelLoops.clear(); @@ -986,7 +978,8 @@ public: virtual void printScop(raw_ostream &OS) const { for (std::vector::const_iterator PI = ParallelLoops.begin(), - PE = ParallelLoops.end(); PI != PE; ++PI) + PE = ParallelLoops.end(); + PI != PE; ++PI) OS << "Parallel loop with iterator '" << *PI << "' generated\n"; } diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index fe5da775eae3..fa3c8f68896d 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -78,10 +78,9 @@ struct AstBuildUserInfo { }; // Print a loop annotated with OpenMP or vector pragmas. -static __isl_give isl_printer * -printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer, - __isl_take isl_ast_print_options *PrintOptions, - IslAstUser *Info) { +static __isl_give isl_printer *printParallelFor( + __isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer, + __isl_take isl_ast_print_options *PrintOptions, IslAstUser *Info) { if (Info) { if (Info->IsInnermostParallel) { Printer = isl_printer_start_line(Printer); @@ -106,7 +105,7 @@ printFor(__isl_take isl_printer *Printer, if (!Id) return isl_ast_node_for_print(Node, Printer, PrintOptions); - struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id); + struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Id); Printer = printParallelFor(Node, Printer, PrintOptions, Info); isl_id_free(Id); return Printer; @@ -115,7 +114,7 @@ printFor(__isl_take isl_printer *Printer, // Allocate an AstNodeInfo structure and initialize it with default values. static struct IslAstUser *allocateIslAstUser() { struct IslAstUser *NodeInfo; - NodeInfo = (struct IslAstUser *) malloc(sizeof(struct IslAstUser)); + NodeInfo = (struct IslAstUser *)malloc(sizeof(struct IslAstUser)); NodeInfo->PMA = 0; NodeInfo->Context = 0; NodeInfo->IsOutermostParallel = 0; @@ -125,7 +124,7 @@ static struct IslAstUser *allocateIslAstUser() { // Free the AstNodeInfo structure. static void freeIslAstUser(void *Ptr) { - struct IslAstUser *UserStruct = (struct IslAstUser *) Ptr; + struct IslAstUser *UserStruct = (struct IslAstUser *)Ptr; isl_ast_build_free(UserStruct->Context); isl_pw_multi_aff_free(UserStruct->PMA); free(UserStruct); @@ -202,9 +201,9 @@ static void markOpenmpParallel(__isl_keep isl_ast_build *Build, // // - Detection of openmp parallel loops // -static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build, - void *User) { - struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User; +static __isl_give isl_id * +astBuildBeforeFor(__isl_keep isl_ast_build *Build, void *User) { + struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *)User; struct IslAstUser *NodeInfo = allocateIslAstUser(); isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo); Id = isl_id_set_free_user(Id, freeIslAstUser); @@ -262,13 +261,13 @@ static bool containsLoops(__isl_take isl_ast_node *Node) { // that is marked as openmp parallel. // static __isl_give isl_ast_node * -astBuildAfterFor(__isl_take isl_ast_node *Node, - __isl_keep isl_ast_build *Build, void *User) { +astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build, + void *User) { isl_id *Id = isl_ast_node_get_annotation(Node); if (!Id) return Node; - struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id); - struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User; + struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Id); + struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *)User; if (Info) { if (Info->IsOutermostParallel) @@ -285,14 +284,13 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, } static __isl_give isl_ast_node * -AtEachDomain(__isl_take isl_ast_node *Node, - __isl_keep isl_ast_build *Context, void *User) -{ +AtEachDomain(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Context, + void *User) { struct IslAstUser *Info = NULL; isl_id *Id = isl_ast_node_get_annotation(Node); if (Id) - Info = (struct IslAstUser *) isl_id_get_user(Id); + Info = (struct IslAstUser *)isl_id_get_user(Id); if (!Info) { // Allocate annotations once: parallel for detection might have already @@ -360,16 +358,14 @@ __isl_give isl_union_map *IslAst::getSchedule() { isl_map *StmtSchedule = Stmt->getScattering(); StmtSchedule = isl_map_intersect_domain(StmtSchedule, Stmt->getDomain()); - Schedule = isl_union_map_union(Schedule, - isl_union_map_from_map(StmtSchedule)); + Schedule = + isl_union_map_union(Schedule, isl_union_map_from_map(StmtSchedule)); } return Schedule; } -IslAst::~IslAst() { - isl_ast_node_free(Root); -} +IslAst::~IslAst() { isl_ast_node_free(Root); } /// Print a C like representation of the program. void IslAst::pprint(llvm::raw_ostream &OS) { @@ -390,13 +386,9 @@ void IslAst::pprint(llvm::raw_ostream &OS) { } /// Create the isl_ast from this program. -__isl_give isl_ast_node *IslAst::getAst() { - return isl_ast_node_copy(Root); -} +__isl_give isl_ast_node *IslAst::getAst() { return isl_ast_node_copy(Root); } -void IslAstInfo::pprint(llvm::raw_ostream &OS) { - Ast->pprint(OS); -} +void IslAstInfo::pprint(llvm::raw_ostream &OS) { Ast->pprint(OS); } void IslAstInfo::releaseMemory() { if (Ast) { @@ -418,9 +410,7 @@ bool IslAstInfo::runOnScop(Scop &Scop) { return false; } -__isl_give isl_ast_node *IslAstInfo::getAst() { - return Ast->getAst(); -} +__isl_give isl_ast_node *IslAstInfo::getAst() { return Ast->getAst(); } void IslAstInfo::printScop(raw_ostream &OS) const { Function *F = S->getRegion().getEntry()->getParent(); diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp index 9b3b618de296..b9837f9477b0 100644 --- a/polly/lib/CodeGen/IslCodeGeneration.cpp +++ b/polly/lib/CodeGen/IslCodeGeneration.cpp @@ -93,8 +93,8 @@ Function *RuntimeDebugBuilder::getPrintF() { if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), - Builder.getInt8PtrTy(), true); + FunctionType *Ty = + FunctionType::get(Builder.getInt32Ty(), Builder.getInt8PtrTy(), true); F = Function::Create(Ty, Linkage, Name, M); } @@ -108,8 +108,8 @@ void RuntimeDebugBuilder::createFlush() { if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), - Builder.getInt8PtrTy(), false); + FunctionType *Ty = + FunctionType::get(Builder.getInt32Ty(), Builder.getInt8PtrTy(), false); F = Function::Create(Ty, Linkage, Name, M); } @@ -148,7 +148,7 @@ public: private: IRBuilder<> &Builder; - std::map &IDToValue; + std::map &IDToValue; Value *createOp(__isl_take isl_ast_expr *Expr); Value *createOpUnary(__isl_take isl_ast_expr *Expr); @@ -213,18 +213,16 @@ Value *IslExprBuilder::createOpNAry(__isl_take isl_ast_expr *Expr) { default: llvm_unreachable("This is no n-ary isl ast expression"); - case isl_ast_op_max: - { - Value *Cmp = Builder.CreateICmpSGT(V, OpV); - V = Builder.CreateSelect(Cmp, V, OpV); - continue; - } - case isl_ast_op_min: - { - Value *Cmp = Builder.CreateICmpSLT(V, OpV); - V = Builder.CreateSelect(Cmp, V, OpV); - continue; - } + case isl_ast_op_max: { + Value *Cmp = Builder.CreateICmpSGT(V, OpV); + V = Builder.CreateSelect(Cmp, V, OpV); + continue; + } + case isl_ast_op_min: { + Value *Cmp = Builder.CreateICmpSLT(V, OpV); + V = Builder.CreateSelect(Cmp, V, OpV); + continue; + } } } @@ -299,21 +297,20 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) { case isl_ast_op_pdiv_q: // Dividend is non-negative Res = Builder.CreateSDiv(LHS, RHS); break; - case isl_ast_op_fdiv_q: // Round towards -infty - { - // TODO: Review code and check that this calculation does not yield - // incorrect overflow in some bordercases. - // - // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d - Value *One = ConstantInt::get(MaxType, 1); - Value *Zero = ConstantInt::get(MaxType, 0); - Value *Sum1 = Builder.CreateSub(LHS, RHS); - Value *Sum2 = Builder.CreateAdd(Sum1, One); - Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); - Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); - Res = Builder.CreateSDiv(Dividend, RHS); - break; - } + case isl_ast_op_fdiv_q: { // Round towards -infty + // TODO: Review code and check that this calculation does not yield + // incorrect overflow in some bordercases. + // + // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d + Value *One = ConstantInt::get(MaxType, 1); + Value *Zero = ConstantInt::get(MaxType, 0); + Value *Sum1 = Builder.CreateSub(LHS, RHS); + Value *Sum2 = Builder.CreateAdd(Sum1, One); + Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); + Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); + Res = Builder.CreateSDiv(Dividend, RHS); + break; + } case isl_ast_op_pdiv_r: // Dividend is non-negative Res = Builder.CreateSRem(LHS, RHS); break; @@ -556,7 +553,7 @@ private: // This maps an isl_id* to the Value* it has in the generated program. For now // on, the only isl_ids that are stored here are the newly calculated loop // ivs. - std::map IDToValue; + std::map IDToValue; // Extract the upper bound of this loop // @@ -577,8 +574,8 @@ private: // of loop iterations. // // 3. With the existing code, upper bounds have been easier to implement. - __isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For, - CmpInst::Predicate &Predicate); + __isl_give isl_ast_expr * + getUpperBound(__isl_keep isl_ast_node *For, CmpInst::Predicate &Predicate); unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); @@ -586,23 +583,22 @@ private: void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); void createSubstitutions(__isl_take isl_pw_multi_aff *PMA, - __isl_take isl_ast_build *Context, - ScopStmt *Stmt, ValueMapT &VMap); - void createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA, - __isl_take isl_ast_build *Context, - ScopStmt *Stmt, VectorValueMapT &VMap, - std::vector &IVS, - __isl_take isl_id *IteratorID); + __isl_take isl_ast_build *Context, ScopStmt *Stmt, + ValueMapT &VMap); + void createSubstitutionsVector( + __isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context, + ScopStmt *Stmt, VectorValueMapT &VMap, std::vector &IVS, + __isl_take isl_id *IteratorID); void createIf(__isl_take isl_ast_node *If); - void createUserVector(__isl_take isl_ast_node *User, - std::vector &IVS, __isl_take isl_id *IteratorID, - __isl_take isl_union_map *Schedule); + void createUserVector( + __isl_take isl_ast_node *User, std::vector &IVS, + __isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule); void createUser(__isl_take isl_ast_node *User); void createBlock(__isl_take isl_ast_node *Block); }; __isl_give isl_ast_expr *IslNodeBuilder::getUpperBound( - __isl_keep isl_ast_node *For, ICmpInst::Predicate &Predicate) { + __isl_keep isl_ast_node *For, ICmpInst::Predicate &Predicate) { isl_id *UBID, *IteratorID; isl_ast_expr *Cond, *Iterator, *UB, *Arg0; isl_ast_op_type Type; @@ -615,14 +611,14 @@ __isl_give isl_ast_expr *IslNodeBuilder::getUpperBound( "conditional expression is not an atomic upper bound"); switch (Type) { - case isl_ast_op_le: - Predicate = ICmpInst::ICMP_SLE; - break; - case isl_ast_op_lt: - Predicate = ICmpInst::ICMP_SLT; - break; - default: - llvm_unreachable("Unexpected comparision type in loop conditon"); + case isl_ast_op_le: + Predicate = ICmpInst::ICMP_SLE; + break; + case isl_ast_op_lt: + Predicate = ICmpInst::ICMP_SLT; + break; + default: + llvm_unreachable("Unexpected comparision type in loop conditon"); } Arg0 = isl_ast_expr_get_op_arg(Cond, 0); @@ -656,7 +652,7 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { if (!Annotation) return -1; - struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation); + struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Annotation); if (!Info) { isl_id_free(Annotation); return -1; @@ -671,18 +667,17 @@ unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { return NumberOfIterations + 1; } -void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, - std::vector &IVS, - __isl_take isl_id *IteratorID, - __isl_take isl_union_map *Schedule) { +void IslNodeBuilder::createUserVector( + __isl_take isl_ast_node *User, std::vector &IVS, + __isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule) { isl_id *Annotation = isl_ast_node_get_annotation(User); assert(Annotation && "Vector user statement is not annotated"); - struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation); + struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Annotation); assert(Info && "Vector user statement annotation does not contain info"); isl_id *Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out); - ScopStmt *Stmt = (ScopStmt *) isl_id_get_user(Id); + ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); VectorValueMapT VectorMap(IVS.size()); isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain()); @@ -726,16 +721,16 @@ void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For, if (MaxType != ValueInc->getType()) ValueInc = Builder.CreateSExt(ValueInc, MaxType); - std::vector IVS(VectorWidth); + std::vector IVS(VectorWidth); IVS[0] = ValueLB; for (int i = 1; i < VectorWidth; i++) - IVS[i] = Builder.CreateAdd(IVS[i-1], ValueInc, "p_vector_iv"); + IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv"); isl_id *Annotation = isl_ast_node_get_annotation(For); assert(Annotation && "For statement is not annotated"); - struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation); + struct IslAstUser *Info = (struct IslAstUser *)isl_id_get_user(Annotation); assert(Info && "For statement annotation does not contain info"); isl_union_map *Schedule = isl_ast_build_get_schedule(Info->Context); @@ -819,8 +814,8 @@ void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) { // executed at least once, which will enable a lot of loop invariant // code motion. - IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock, - Predicate); + IV = + createLoop(ValueLB, ValueUB, ValueInc, Builder, P, AfterBlock, Predicate); IDToValue[IteratorID] = IV; create(Body); @@ -853,8 +848,8 @@ void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); - BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), - Builder.GetInsertPoint(), P); + BasicBlock *CondBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P); CondBB->setName("polly.cond"); BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P); MergeBB->setName("polly.merge"); @@ -914,14 +909,15 @@ void IslNodeBuilder::createSubstitutions(__isl_take isl_pw_multi_aff *PMA, isl_ast_build_free(Context); } -void IslNodeBuilder::createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA, - __isl_take isl_ast_build *Context, ScopStmt *Stmt, VectorValueMapT &VMap, - std::vector &IVS, __isl_take isl_id *IteratorID) { +void IslNodeBuilder::createSubstitutionsVector( + __isl_take isl_pw_multi_aff *PMA, __isl_take isl_ast_build *Context, + ScopStmt *Stmt, VectorValueMapT &VMap, std::vector &IVS, + __isl_take isl_id *IteratorID) { int i = 0; Value *OldValue = IDToValue[IteratorID]; - for (std::vector::iterator II = IVS.begin(), IE = IVS.end(); - II != IE; ++II) { + for (std::vector::iterator II = IVS.begin(), IE = IVS.end(); + II != IE; ++II) { IDToValue[IteratorID] = *II; createSubstitutions(isl_pw_multi_aff_copy(PMA), isl_ast_build_copy(Context), Stmt, VMap[i]); @@ -943,11 +939,11 @@ void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { Annotation = isl_ast_node_get_annotation(User); assert(Annotation && "Scalar user statement is not annotated"); - Info = (struct IslAstUser *) isl_id_get_user(Annotation); + Info = (struct IslAstUser *)isl_id_get_user(Annotation); assert(Info && "Scalar user statement annotation does not contain info"); Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out); - Stmt = (ScopStmt *) isl_id_get_user(Id); + Stmt = (ScopStmt *)isl_id_get_user(Id); createSubstitutions(isl_pw_multi_aff_copy(Info->PMA), isl_ast_build_copy(Info->Context), Stmt, VMap); @@ -1000,7 +996,7 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { Instruction *InsertLocation; Id = isl_set_get_dim_id(Context, isl_dim_param, i); - Scev = (const SCEV*) isl_id_get_user(Id); + Scev = (const SCEV *)isl_id_get_user(Id); T = dyn_cast(Scev->getType()); InsertLocation = --(Builder.GetInsertBlock()->end()); Value *V = Rewriter.expandCodeFor(Scev, T, InsertLocation); @@ -1033,8 +1029,7 @@ public: return true; } - virtual void printScop(raw_ostream &OS) const { - } + virtual void printScop(raw_ostream &OS) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); diff --git a/polly/lib/CodeGen/LoopGenerators.cpp b/polly/lib/CodeGen/LoopGenerators.cpp index 7dd0847bad1b..6ba0e8d43755 100644 --- a/polly/lib/CodeGen/LoopGenerators.cpp +++ b/polly/lib/CodeGen/LoopGenerators.cpp @@ -73,12 +73,9 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, return IV; } -void OMPGenerator::createCallParallelLoopStart(Value *SubFunction, - Value *SubfunctionParam, - Value *NumberOfThreads, - Value *LowerBound, - Value *UpperBound, - Value *Stride) { +void OMPGenerator::createCallParallelLoopStart( + Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads, + Value *LowerBound, Value *UpperBound, Value *Stride) { Module *M = getModule(); const char *Name = "GOMP_parallel_loop_runtime_start"; Function *F = M->getFunction(Name); @@ -88,35 +85,23 @@ void OMPGenerator::createCallParallelLoopStart(Value *SubFunction, Type *LongTy = getIntPtrTy(); GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - Type *Params[] = { - PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(), - Builder.getInt8PtrTy(), - false)), - Builder.getInt8PtrTy(), - Builder.getInt32Ty(), - LongTy, - LongTy, - LongTy, - }; + Type *Params[] = { PointerType::getUnqual(FunctionType::get( + Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), + Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy, + LongTy, LongTy, }; FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); F = Function::Create(Ty, Linkage, Name, M); } - Value *Args[] = { - SubFunction, - SubfunctionParam, - NumberOfThreads, - LowerBound, - UpperBound, - Stride, - }; + Value *Args[] = { SubFunction, SubfunctionParam, NumberOfThreads, LowerBound, + UpperBound, Stride, }; Builder.CreateCall(F, Args); } -Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, - Value *UpperBoundPtr) { +Value * +OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr) { Module *M = getModule(); const char *Name = "GOMP_loop_runtime_next"; Function *F = M->getFunction(Name); @@ -126,23 +111,17 @@ Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy()); GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - Type *Params[] = { - LongPtrTy, - LongPtrTy, - }; + Type *Params[] = { LongPtrTy, LongPtrTy, }; FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); F = Function::Create(Ty, Linkage, Name, M); } - Value *Args[] = { - LowerBoundPtr, - UpperBoundPtr, - }; + Value *Args[] = { LowerBoundPtr, UpperBoundPtr, }; Value *Return = Builder.CreateCall(F, Args); - Return = Builder.CreateICmpNE(Return, Builder.CreateZExt(Builder.getFalse(), - Return->getType())); + Return = Builder.CreateICmpNE( + Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); return Return; } @@ -189,7 +168,7 @@ Module *OMPGenerator::getModule() { Function *OMPGenerator::createSubfunctionDefinition() { Module *M = getModule(); Function *F = Builder.GetInsertBlock()->getParent(); - std::vector Arguments(1, Builder.getInt8PtrTy()); + std::vector Arguments(1, Builder.getInt8PtrTy()); FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); Function *FN = Function::Create(FT, Function::InternalLinkage, F->getName() + ".omp_subfn", M); @@ -202,8 +181,8 @@ Function *OMPGenerator::createSubfunctionDefinition() { return FN; } -Value *OMPGenerator::loadValuesIntoStruct(SetVector &Values) { - std::vector Members; +Value *OMPGenerator::loadValuesIntoStruct(SetVector &Values) { + std::vector Members; for (unsigned i = 0; i < Values.size(); i++) Members.push_back(Values[i]->getType()); @@ -219,26 +198,24 @@ Value *OMPGenerator::loadValuesIntoStruct(SetVector &Values) { return Struct; } -void OMPGenerator::extractValuesFromStruct(SetVector OldValues, - Value *Struct, - ValueToValueMapTy &Map) { +void OMPGenerator::extractValuesFromStruct( + SetVector OldValues, Value *Struct, ValueToValueMapTy &Map) { for (unsigned i = 0; i < OldValues.size(); i++) { Value *Address = Builder.CreateStructGEP(Struct, i); Value *NewValue = Builder.CreateLoad(Address); - Map.insert(std::make_pair(OldValues[i], NewValue)); + Map.insert(std::make_pair(OldValues[i], NewValue)); } } -Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData, - SetVector Data, - ValueToValueMapTy &Map, - Function **SubFunction) { +Value *OMPGenerator::createSubfunction( + Value *Stride, Value *StructData, SetVector Data, + ValueToValueMapTy &Map, Function **SubFunction) { Function *FN = createSubfunctionDefinition(); BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB, - *AfterBB; + *AfterBB; Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule, - *LowerBound, *UpperBound, *IV; + *LowerBound, *UpperBound, *IV; Type *IntPtrTy = getIntPtrTy(); LLVMContext &Context = FN->getContext(); @@ -303,11 +280,10 @@ Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData, return IV; } -Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound, - Value *Stride, - SetVector &Values, - ValueToValueMapTy &Map, - BasicBlock::iterator *LoopBody) { +Value *OMPGenerator::createParallelLoop( + Value *LowerBound, Value *UpperBound, Value *Stride, + SetVector &Values, ValueToValueMapTy &Map, + BasicBlock::iterator *LoopBody) { Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads; Function *SubFunction; @@ -319,15 +295,15 @@ Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound, Builder.SetInsertPoint(PrevInsertPoint); // Create call for GOMP_parallel_loop_runtime_start. - SubfunctionParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), - "omp_data"); + SubfunctionParam = + Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data"); NumberOfThreads = Builder.getInt32(0); // Add one as the upper bound provided by openmp is a < comparison // whereas the codegenForSequential function creates a <= comparison. - UpperBound = Builder.CreateAdd(UpperBound, - ConstantInt::get(getIntPtrTy(), 1)); + UpperBound = + Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1)); createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads, LowerBound, UpperBound, Stride); diff --git a/polly/lib/CodeGen/PTXGenerator.cpp b/polly/lib/CodeGen/PTXGenerator.cpp index 4e022a6ddd9e..22e8b6c8309f 100644 --- a/polly/lib/CodeGen/PTXGenerator.cpp +++ b/polly/lib/CodeGen/PTXGenerator.cpp @@ -34,9 +34,9 @@ using namespace llvm; using namespace polly; PTXGenerator::PTXGenerator(IRBuilder<> &Builder, Pass *P, - const std::string &Triple): - Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1), - BlockWidth(1), BlockHeight(1), OutputBytes(0) { + const std::string &Triple) + : Builder(Builder), P(P), GPUTriple(Triple), GridWidth(1), GridHeight(1), + BlockWidth(1), BlockHeight(1), OutputBytes(0) { InitializeGPUDataTypes(); } @@ -50,12 +50,12 @@ Function *PTXGenerator::createSubfunctionDefinition(int NumArgs) { Module *M = getModule(); Function *F = Builder.GetInsertBlock()->getParent(); - std::vector Arguments; + std::vector Arguments; for (int i = 0; i < NumArgs; i++) Arguments.push_back(Builder.getInt8PtrTy()); FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); Function *FN = Function::Create(FT, Function::InternalLinkage, - F->getName() + "_ptx_subfn", M); + F->getName() + "_ptx_subfn", M); FN->setCallingConv(CallingConv::PTX_Kernel); // Do not run any optimization pass on the new function. @@ -67,10 +67,9 @@ Function *PTXGenerator::createSubfunctionDefinition(int NumArgs) { return FN; } -void PTXGenerator::createSubfunction(SetVector &UsedValues, - SetVector &OriginalIVS, - PTXGenerator::ValueToValueMapTy &VMap, - Function **SubFunction) { +void PTXGenerator::createSubfunction( + SetVector &UsedValues, SetVector &OriginalIVS, + PTXGenerator::ValueToValueMapTy &VMap, Function **SubFunction) { Function *FN = createSubfunctionDefinition(UsedValues.size()); Module *M = getModule(); LLVMContext &Context = FN->getContext(); @@ -98,20 +97,20 @@ void PTXGenerator::createSubfunction(SetVector &UsedValues, Value *BaseAddr = UsedValues[j]; Type *ArrayTy = BaseAddr->getType(); Value *Param = Builder.CreateBitCast(AI, ArrayTy); - VMap.insert(std::make_pair(BaseAddr, Param)); + VMap.insert(std::make_pair(BaseAddr, Param)); AI++; } // FIXME: These intrinsics should be inserted on-demand. However, we insert // them all currently for simplicity. Function *GetNctaidX = - Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_x); + Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_x); Function *GetNctaidY = - Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_y); + Intrinsic::getDeclaration(M, Intrinsic::ptx_read_nctaid_y); Function *GetCtaidX = - Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_x); + Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_x); Function *GetCtaidY = - Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_y); + Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ctaid_y); Function *GetNtidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ntid_x); Function *GetNtidY = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_ntid_y); Function *GetTidX = Intrinsic::getDeclaration(M, Intrinsic::ptx_read_tid_x); @@ -142,8 +141,8 @@ void PTXGenerator::createSubfunction(SetVector &UsedValues, Value *BlockID, *ThreadID; switch (NumDims) { case 1: { - Value *BlockSize = Builder.CreateMul(BlockWidth, BlockHeight, - "p_gpu_blocksize"); + Value *BlockSize = + Builder.CreateMul(BlockWidth, BlockHeight, "p_gpu_blocksize"); BlockID = Builder.CreateMul(BIDy, GridWidth, "p_gpu_index_i"); BlockID = Builder.CreateAdd(BlockID, BIDx); BlockID = Builder.CreateMul(BlockID, BlockSize); @@ -183,11 +182,11 @@ void PTXGenerator::createSubfunction(SetVector &UsedValues, return; } - assert(OriginalIVS.size() == Substitutions.size() - && "The size of IVS should be equal to the size of substitutions."); + assert(OriginalIVS.size() == Substitutions.size() && + "The size of IVS should be equal to the size of substitutions."); for (unsigned i = 0; i < OriginalIVS.size(); ++i) { - VMap.insert(std::make_pair(OriginalIVS[i], - Substitutions[i])); + VMap.insert( + std::make_pair(OriginalIVS[i], Substitutions[i])); } Builder.CreateBr(ExitBB); @@ -202,10 +201,9 @@ void PTXGenerator::createSubfunction(SetVector &UsedValues, *SubFunction = FN; } -void PTXGenerator::startGeneration(SetVector &UsedValues, - SetVector &OriginalIVS, - ValueToValueMapTy &VMap, - BasicBlock::iterator *LoopBody) { +void PTXGenerator::startGeneration( + SetVector &UsedValues, SetVector &OriginalIVS, + ValueToValueMapTy &VMap, BasicBlock::iterator *LoopBody) { Function *SubFunction; BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint(); createSubfunction(UsedValues, OriginalIVS, VMap, &SubFunction); @@ -213,9 +211,7 @@ void PTXGenerator::startGeneration(SetVector &UsedValues, Builder.SetInsertPoint(PrevInsertPoint); } -IntegerType *PTXGenerator::getInt64Type() { - return Builder.getInt64Ty(); -} +IntegerType *PTXGenerator::getInt64Type() { return Builder.getInt64Ty(); } PointerType *PTXGenerator::getI8PtrType() { return PointerType::getUnqual(Builder.getInt8Ty()); @@ -260,7 +256,7 @@ void PTXGenerator::InitializeGPUDataTypes() { ModuleTy = StructType::create(Context, "struct.PollyGPUModuleT"); KernelTy = StructType::create(Context, "struct.PollyGPUFunctionT"); DeviceTy = StructType::create(Context, "struct.PollyGPUDeviceT"); - DevDataTy = StructType::create(Context,"struct.PollyGPUDevicePtrT"); + DevDataTy = StructType::create(Context, "struct.PollyGPUDevicePtrT"); EventTy = StructType::create(Context, "struct.PollyGPUEventT"); } @@ -272,7 +268,7 @@ void PTXGenerator::createCallInitDevice(Value *Context, Value *Device) { // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(PointerType::getUnqual(getGPUContextPtrType())); Args.push_back(PointerType::getUnqual(getGPUDevicePtrType())); FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); @@ -290,7 +286,7 @@ void PTXGenerator::createCallGetPTXModule(Value *Buffer, Value *Module) { // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getI8PtrType()); Args.push_back(PointerType::getUnqual(getGPUModulePtrType())); FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); @@ -309,7 +305,7 @@ void PTXGenerator::createCallGetPTXKernelEntry(Value *Entry, Value *Module, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getI8PtrType()); Args.push_back(getGPUModulePtrType()); Args.push_back(PointerType::getUnqual(getGPUFunctionPtrType())); @@ -320,9 +316,8 @@ void PTXGenerator::createCallGetPTXKernelEntry(Value *Entry, Value *Module, Builder.CreateCall3(F, Entry, Module, Kernel); } -void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData, - Value *DeviceData, - Value *Size) { +void PTXGenerator::createCallAllocateMemoryForHostAndDevice( + Value *HostData, Value *DeviceData, Value *Size) { const char *Name = "polly_allocateMemoryForHostAndDevice"; Module *M = getModule(); Function *F = M->getFunction(Name); @@ -330,7 +325,7 @@ void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getPtrI8PtrType()); Args.push_back(PointerType::getUnqual(getPtrGPUDevicePtrType())); Args.push_back(getInt64Type()); @@ -341,9 +336,8 @@ void PTXGenerator::createCallAllocateMemoryForHostAndDevice(Value *HostData, Builder.CreateCall3(F, HostData, DeviceData, Size); } -void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData, - Value *HostData, - Value *Size) { +void PTXGenerator::createCallCopyFromHostToDevice( + Value *DeviceData, Value *HostData, Value *Size) { const char *Name = "polly_copyFromHostToDevice"; Module *M = getModule(); Function *F = M->getFunction(Name); @@ -351,7 +345,7 @@ void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getPtrGPUDevicePtrType()); Args.push_back(getI8PtrType()); Args.push_back(getInt64Type()); @@ -362,9 +356,8 @@ void PTXGenerator::createCallCopyFromHostToDevice(Value *DeviceData, Builder.CreateCall3(F, DeviceData, HostData, Size); } -void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData, - Value *DeviceData, - Value *Size) { +void PTXGenerator::createCallCopyFromDeviceToHost( + Value *HostData, Value *DeviceData, Value *Size) { const char *Name = "polly_copyFromDeviceToHost"; Module *M = getModule(); Function *F = M->getFunction(Name); @@ -372,7 +365,7 @@ void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getI8PtrType()); Args.push_back(getPtrGPUDevicePtrType()); Args.push_back(getInt64Type()); @@ -383,10 +376,8 @@ void PTXGenerator::createCallCopyFromDeviceToHost(Value *HostData, Builder.CreateCall3(F, HostData, DeviceData, Size); } -void PTXGenerator::createCallSetKernelParameters(Value *Kernel, - Value *BlockWidth, - Value *BlockHeight, - Value *DeviceData) { +void PTXGenerator::createCallSetKernelParameters( + Value *Kernel, Value *BlockWidth, Value *BlockHeight, Value *DeviceData) { const char *Name = "polly_setKernelParameters"; Module *M = getModule(); Function *F = M->getFunction(Name); @@ -394,7 +385,7 @@ void PTXGenerator::createCallSetKernelParameters(Value *Kernel, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getGPUFunctionPtrType()); Args.push_back(getInt64Type()); Args.push_back(getInt64Type()); @@ -415,7 +406,7 @@ void PTXGenerator::createCallLaunchKernel(Value *Kernel, Value *GridWidth, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getGPUFunctionPtrType()); Args.push_back(getInt64Type()); Args.push_back(getInt64Type()); @@ -435,7 +426,7 @@ void PTXGenerator::createCallStartTimerByCudaEvent(Value *StartEvent, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(PointerType::getUnqual(getGPUEventPtrType())); Args.push_back(PointerType::getUnqual(getGPUEventPtrType())); FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); @@ -445,9 +436,8 @@ void PTXGenerator::createCallStartTimerByCudaEvent(Value *StartEvent, Builder.CreateCall2(F, StartEvent, StopEvent); } -void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent, - Value *StopEvent, - Value *Timer) { +void PTXGenerator::createCallStopTimerByCudaEvent( + Value *StartEvent, Value *StopEvent, Value *Timer) { const char *Name = "polly_stopTimerByCudaEvent"; Module *M = getModule(); Function *F = M->getFunction(Name); @@ -455,7 +445,7 @@ void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getGPUEventPtrType()); Args.push_back(getGPUEventPtrType()); Args.push_back(getFloatPtrType()); @@ -466,11 +456,9 @@ void PTXGenerator::createCallStopTimerByCudaEvent(Value *StartEvent, Builder.CreateCall3(F, StartEvent, StopEvent, Timer); } -void PTXGenerator::createCallCleanupGPGPUResources(Value *HostData, - Value *DeviceData, - Value *Module, - Value *Context, - Value *Kernel) { +void PTXGenerator::createCallCleanupGPGPUResources( + Value *HostData, Value *DeviceData, Value *Module, Value *Context, + Value *Kernel) { const char *Name = "polly_cleanupGPGPUResources"; llvm::Module *M = getModule(); Function *F = M->getFunction(Name); @@ -478,7 +466,7 @@ void PTXGenerator::createCallCleanupGPGPUResources(Value *HostData, // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - std::vector Args; + std::vector Args; Args.push_back(getI8PtrType()); Args.push_back(getPtrGPUDevicePtrType()); Args.push_back(getGPUModulePtrType()); @@ -516,11 +504,11 @@ Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) { std::string LLVMKernelStr; raw_string_ostream NameROS(LLVMKernelStr); formatted_raw_ostream FOS(NameROS); - FOS << "target triple = \"" << GPUTriple <<"\"\n"; + FOS << "target triple = \"" << GPUTriple << "\"\n"; SubFunction->print(FOS); // Insert ptx intrinsics into the kernel string. - for (Module::iterator I = M->begin(), E = M->end(); I != E; ) { + for (Module::iterator I = M->begin(), E = M->end(); I != E;) { Function *F = I++; // Function must be a prototype and unused. if (F->isDeclaration() && F->isIntrinsic()) { @@ -541,13 +529,12 @@ Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) { } } - Value *LLVMKernel = Builder.CreateGlobalStringPtr(LLVMKernelStr, - "llvm_kernel"); + Value *LLVMKernel = + Builder.CreateGlobalStringPtr(LLVMKernelStr, "llvm_kernel"); Value *MCPU = Builder.CreateGlobalStringPtr("sm_10", "mcpu"); Value *Features = Builder.CreateGlobalStringPtr("", "cpu_features"); - Function *GetDeviceKernel = Intrinsic::getDeclaration(M, - Intrinsic::codegen); + Function *GetDeviceKernel = Intrinsic::getDeclaration(M, Intrinsic::codegen); return Builder.CreateCall3(GetDeviceKernel, LLVMKernel, MCPU, Features); } @@ -596,22 +583,22 @@ void PTXGenerator::eraseUnusedFunctions(Function *SubFunction) { void PTXGenerator::finishGeneration(Function *F) { // Define data used by the GPURuntime library. - AllocaInst *PtrCUContext = Builder.CreateAlloca(getGPUContextPtrType(), 0, - "phcontext"); - AllocaInst *PtrCUDevice = Builder.CreateAlloca(getGPUDevicePtrType(), 0, - "phdevice"); - AllocaInst *PtrCUModule = Builder.CreateAlloca(getGPUModulePtrType(), 0, - "phmodule"); - AllocaInst *PtrCUKernel = Builder.CreateAlloca(getGPUFunctionPtrType(), 0, - "phkernel"); - AllocaInst *PtrCUStartEvent = Builder.CreateAlloca(getGPUEventPtrType(), 0, - "pstart_timer"); - AllocaInst *PtrCUStopEvent = Builder.CreateAlloca(getGPUEventPtrType(), 0, - "pstop_timer"); - AllocaInst *PtrDevData = Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0, - "pdevice_data"); - AllocaInst *PtrHostData = Builder.CreateAlloca(getI8PtrType(), 0, - "phost_data"); + AllocaInst *PtrCUContext = + Builder.CreateAlloca(getGPUContextPtrType(), 0, "phcontext"); + AllocaInst *PtrCUDevice = + Builder.CreateAlloca(getGPUDevicePtrType(), 0, "phdevice"); + AllocaInst *PtrCUModule = + Builder.CreateAlloca(getGPUModulePtrType(), 0, "phmodule"); + AllocaInst *PtrCUKernel = + Builder.CreateAlloca(getGPUFunctionPtrType(), 0, "phkernel"); + AllocaInst *PtrCUStartEvent = + Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstart_timer"); + AllocaInst *PtrCUStopEvent = + Builder.CreateAlloca(getGPUEventPtrType(), 0, "pstop_timer"); + AllocaInst *PtrDevData = + Builder.CreateAlloca(getPtrGPUDevicePtrType(), 0, "pdevice_data"); + AllocaInst *PtrHostData = + Builder.CreateAlloca(getI8PtrType(), 0, "phost_data"); Type *FloatTy = llvm::Type::getFloatTy(getModule()->getContext()); AllocaInst *PtrElapsedTimes = Builder.CreateAlloca(FloatTy, 0, "ptimer"); @@ -648,13 +635,11 @@ void PTXGenerator::finishGeneration(Function *F) { // Record the end time. LoadInst *CUStartEvent = Builder.CreateLoad(PtrCUStartEvent, "start_timer"); LoadInst *CUStopEvent = Builder.CreateLoad(PtrCUStopEvent, "stop_timer"); - createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent, - PtrElapsedTimes); + createCallStopTimerByCudaEvent(CUStartEvent, CUStopEvent, PtrElapsedTimes); // Cleanup all the resources used. LoadInst *CUContext = Builder.CreateLoad(PtrCUContext, "cucontext"); - createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext, - CUKernel); + createCallCleanupGPGPUResources(HData, DData, CUModule, CUContext, CUKernel); // Erase the ptx kernel and device subfunctions and ptx intrinsics from // current module.