From f74a4cd3dd676c58f9a61f20319048b615c41570 Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Fri, 23 Mar 2012 10:35:18 +0000 Subject: [PATCH] CodeGen: Extract the LLVM-IR generaction of scalar and OpenMP loops. We create a new file LoopGenerators that provides utility classes for the generation of OpenMP parallel and scalar loops. This means we move a lot of the OpenMP generation out of the Polly specific code generator. llvm-svn: 153325 --- polly/include/polly/LoopGenerators.h | 108 ++++++++ polly/lib/CMakeLists.txt | 1 + polly/lib/CodeGeneration.cpp | 373 +++++---------------------- polly/lib/LoopGenerators.cpp | 335 ++++++++++++++++++++++++ 4 files changed, 506 insertions(+), 311 deletions(-) create mode 100644 polly/include/polly/LoopGenerators.h create mode 100644 polly/lib/LoopGenerators.cpp diff --git a/polly/include/polly/LoopGenerators.h b/polly/include/polly/LoopGenerators.h new file mode 100644 index 000000000000..d55d55cf3577 --- /dev/null +++ b/polly/include/polly/LoopGenerators.h @@ -0,0 +1,108 @@ +//===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions to create scalar and OpenMP parallel loops +// as LLVM-IR. +// +//===----------------------------------------------------------------------===// +#include "llvm/Support/IRBuilder.h" +#include "llvm/ADT/SetVector.h" + +#include + +namespace llvm { + class Value; + class Pass; + class BasicBlock; +} + +using namespace llvm; + +/// @brief Create a scalar loop. +/// +/// @param LowerBound The starting value of the induction variable. +/// @param UpperBound The upper bound of the induction variable. +/// @param Stride The value by which the induction variable is incremented. +/// +/// @param Builder The builder used to create the loop. +/// @param P A pointer to the pass that uses this function. It is used +/// to update analysis information. +Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride, + IRBuilder<> *Builder, Pass *P, BasicBlock **AfterBlock); + +class OMPGenerator { +public: + typedef std::map ValueToValueMapTy; + + OMPGenerator(IRBuilder<> &Builder, Pass *P): Builder(Builder), P(P) {} + + /// @brief Create an OpenMP parallel loop. + /// + /// + /// @param LowerBound The starting value of the induction variable. + /// @param UpperBound The upper bound of the induction variable. + /// @param Stride The value by which the induction variable is + /// incremented. + /// + /// @param UsedValues A set of LLVM-IR Values that should be available to + /// the new loop body. + /// @param VMap This map is filled by createParallelLoop(). It + /// maps the values in UsedValues to Values through which + /// their content is available within the loop body. + /// @param LoopBody A pointer to an iterator that is set to point to the + /// body of the created loop. It should be used to insert + /// instructions that form the actual loop body. + /// + /// @return Value* The newly created induction variable for this loop. + Value *createParallelLoop(Value *LowerBound, Value *UpperBound, Value *Stride, + SetVector &UsedValues, + ValueToValueMapTy &VMap, + BasicBlock::iterator *LoopBody); + +private: + IRBuilder<> &Builder; + Pass *P; + + IntegerType *getIntPtrTy(); + Module *getModule(); + + void createCallParallelLoopStart(Value *SubFunction, Value *SubfunctionParam, + Value *NumberOfThreads, Value *LowerBound, + Value *UpperBound, Value *Stride); + Value *createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr); + void createCallParallelEnd(); + void createCallLoopEndNowait(); + + Value *loadValuesIntoStruct(SetVector &Values); + void extractValuesFromStruct(SetVector OldValues, + Value *Struct, ValueToValueMapTy &Map); + + /// @brief Create the OpenMP subfunction. + /// + /// @param Stride The value by which the induction variable is + /// incremented. + /// @param Struct The structure that is used to make Values available to + /// the loop body. + /// @param UsedValues A set of LLVM-IR Values that should be available to + /// the new loop body. + /// @param VMap This map that is filled by createSubfunction(). It + /// maps the values in UsedValues to Values through which + /// their content is available within the loop body. + /// @param SubFunction The newly created SubFunction is returned here. + /// + /// @return Value* The newly created induction variable. + Value *createSubfunction(Value *Stride, Value *Struct, + SetVector UsedValues, + ValueToValueMapTy &VMap, + Function **SubFunction); + + /// @brief Create the definition of the OpenMP subfunction. + Function *createSubfunctionDefinition(); +}; + diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt index 340ab76a96c3..7db1fae6fb53 100755 --- a/polly/lib/CMakeLists.txt +++ b/polly/lib/CMakeLists.txt @@ -25,6 +25,7 @@ add_polly_library(LLVMPolly IndependentBlocks.cpp IndVarSimplify.cpp MayAliasSet.cpp + LoopGenerators.cpp Pocc.cpp RegionSimplify.cpp RegisterPasses.cpp diff --git a/polly/lib/CodeGeneration.cpp b/polly/lib/CodeGeneration.cpp index 2bba6e6ad112..5f42c748c5aa 100644 --- a/polly/lib/CodeGeneration.cpp +++ b/polly/lib/CodeGeneration.cpp @@ -29,6 +29,7 @@ #include "polly/ScopInfo.h" #include "polly/TempScopInfo.h" #include "polly/Support/GICHelper.h" +#include "polly/LoopGenerators.h" #include "llvm/Module.h" #include "llvm/ADT/SetVector.h" @@ -86,70 +87,6 @@ typedef DenseMap ValueMapT; typedef DenseMap CharMapT; typedef std::vector VectorValueMapT; -// Create a new loop. -// -// @param Builder The builder used to create the loop. It also defines the -// place where to create the loop. -// @param UB The upper bound of the loop iv. -// @param Stride The number by which the loop iv is incremented after every -// iteration. -static Value *createLoop(IRBuilder<> *Builder, Value *LB, Value *UB, - APInt Stride, Pass *P, BasicBlock **AfterBlock) { - DominatorTree &DT = P->getAnalysis(); - Function *F = Builder->GetInsertBlock()->getParent(); - LLVMContext &Context = F->getContext(); - - BasicBlock *PreheaderBB = Builder->GetInsertBlock(); - BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); - BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F); - BasicBlock *AfterBB = SplitBlock(PreheaderBB, Builder->GetInsertPoint()++, P); - AfterBB->setName("polly.loop_after"); - - PreheaderBB->getTerminator()->setSuccessor(0, HeaderBB); - DT.addNewBlock(HeaderBB, PreheaderBB); - - Builder->SetInsertPoint(HeaderBB); - - // Use the type of upper and lower bound. - assert(LB->getType() == UB->getType() - && "Different types for upper and lower bound."); - - IntegerType *LoopIVType = dyn_cast(UB->getType()); - assert(LoopIVType && "UB is not integer?"); - - // IV - PHINode *IV = Builder->CreatePHI(LoopIVType, 2, "polly.loopiv"); - IV->addIncoming(LB, PreheaderBB); - - // IV increment. - Value *StrideValue = ConstantInt::get(LoopIVType, - Stride.zext(LoopIVType->getBitWidth())); - Value *IncrementedIV = Builder->CreateAdd(IV, StrideValue, - "polly.next_loopiv"); - - // Exit condition. - Value *CMP; - if (AtLeastOnce) { // At least on iteration. - UB = Builder->CreateAdd(UB, Builder->getInt64(1)); - CMP = Builder->CreateICmpNE(IV, UB); - } else { // Maybe not executed at all. - CMP = Builder->CreateICmpSLE(IV, UB); - } - - Builder->CreateCondBr(CMP, BodyBB, AfterBB); - DT.addNewBlock(BodyBB, HeaderBB); - - Builder->SetInsertPoint(BodyBB); - Builder->CreateBr(HeaderBB); - IV->addIncoming(IncrementedIV, BodyBB); - DT.changeImmediateDominator(AfterBB, HeaderBB); - - Builder->SetInsertPoint(BodyBB->begin()); - *AfterBlock = AfterBB; - - return IV; -} - class IslGenerator; class IslGenerator { @@ -1036,6 +973,10 @@ void ClastExpCodeGen::setIVS(CharMapT *IVSNew) { } class ClastStmtCodeGen { +public: + const std::vector &getParallelLoops(); + +private: // The Scop we code generate. Scop *S; Pass *P; @@ -1066,11 +1007,6 @@ class ClastStmtCodeGen { std::vector parallelLoops; -public: - - const std::vector &getParallelLoops(); - - protected: void codegen(const clast_assignment *a); void codegen(const clast_assignment *a, ScopStmt *Statement, @@ -1087,36 +1023,16 @@ public: void codegen(const clast_block *b); /// @brief Create a classical sequential loop. - void codegenForSequential(const clast_for *f, Value *LowerBound = 0, - Value *UpperBound = 0); - - /// @brief Add a new definition of an openmp subfunction. - Function *addOpenMPSubfunction(Module *M); - - /// @brief Add values to the OpenMP structure. - /// - /// Create the subfunction structure and add the values from the list. - Value *addValuesToOpenMPStruct(SetVector OMPDataVals, - Function *SubFunction); + void codegenForSequential(const clast_for *f); /// @brief Create OpenMP structure values. /// - /// Create a list of values that has to be stored into the subfuncition + /// Create a list of values that has to be stored into the OpenMP subfuncition /// structure. - SetVector createOpenMPStructValues(); + SetVector getOMPValues(); - /// @brief Extract the values from the subfunction parameter. - /// - /// Extract the values from the subfunction parameter and update the clast - /// variables to point to the new values. - void extractValuesFromOpenMPStruct(CharMapT *clastVarsOMP, - SetVector OMPDataVals, - Value *userContext); - - /// @brief Add body to the subfunction. - void addOpenMPSubfunctionBody(Function *FN, const clast_for *f, - Value *structData, - SetVector OMPDataVals); + void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, + CharMapT &ClastVarsNew); /// @brief Create an OpenMP parallel for loop. /// @@ -1232,24 +1148,16 @@ void ClastStmtCodeGen::codegen(const clast_block *b) { codegen(b->body); } -void ClastStmtCodeGen::codegenForSequential(const clast_for *f, - Value *LowerBound, - Value *UpperBound) { +void ClastStmtCodeGen::codegenForSequential(const clast_for *f) { + Value *LowerBound, *UpperBound, *IV, *Stride; BasicBlock *AfterBB; Type *IntPtrTy = getIntPtrTy(); - APInt Stride = APInt_from_MPZ(f->stride); - // The value of lowerbound and upperbound will be supplied, if this - // function is called while generating OpenMP code. Otherwise get - // the values. - assert(!!LowerBound == !!UpperBound && "Either give both bounds or none"); + LowerBound = ExpGen.codegen(f->LB, IntPtrTy); + UpperBound = ExpGen.codegen(f->UB, IntPtrTy); + Stride = Builder.getInt(APInt_from_MPZ(f->stride)); - if (LowerBound == 0) { - LowerBound = ExpGen.codegen(f->LB, IntPtrTy); - UpperBound = ExpGen.codegen(f->UB, IntPtrTy); - } - - Value *IV = createLoop(&Builder, LowerBound, UpperBound, Stride, P, &AfterBB); + IV = createLoop(LowerBound, UpperBound, Stride, &Builder, P, &AfterBB); // Add loop iv to symbols. (*ClastVars)[f->iterator] = IV; @@ -1262,193 +1170,87 @@ void ClastStmtCodeGen::codegenForSequential(const clast_for *f, Builder.SetInsertPoint(AfterBB->begin()); } -Function *ClastStmtCodeGen::addOpenMPSubfunction(Module *M) { - Function *F = Builder.GetInsertBlock()->getParent(); - std::vector Arguments(1, Builder.getInt8PtrTy()); - FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); - Function *FN = Function::Create(FT, Function::InternalLinkage, - F->getName() + ".omp_subfn", M); - // Do not run any polly pass on the new function. - P->getAnalysis().markFunctionAsInvalid(FN); +SetVector ClastStmtCodeGen::getOMPValues() { + SetVector Values; - Function::arg_iterator AI = FN->arg_begin(); - AI->setName("omp.userContext"); - - return FN; -} - -Value *ClastStmtCodeGen::addValuesToOpenMPStruct(SetVector OMPDataVals, - Function *SubFunction) { - std::vector structMembers; - - // Create the structure. - for (unsigned i = 0; i < OMPDataVals.size(); i++) - structMembers.push_back(OMPDataVals[i]->getType()); - - StructType *structTy = StructType::get(Builder.getContext(), - structMembers); - // Store the values into the structure. - Value *structData = Builder.CreateAlloca(structTy, 0, "omp.userContext"); - for (unsigned i = 0; i < OMPDataVals.size(); i++) { - Value *storeAddr = Builder.CreateStructGEP(structData, i); - Builder.CreateStore(OMPDataVals[i], storeAddr); - } - - return structData; -} - -SetVector ClastStmtCodeGen::createOpenMPStructValues() { - SetVector OMPDataVals; - - // Push the clast variables available in the clastVars. + // The clast variables for (CharMapT::iterator I = ClastVars->begin(), E = ClastVars->end(); I != E; I++) - OMPDataVals.insert(I->second); + Values.insert(I->second); - // Push the base addresses of memory references. + // The memory reference base addresses for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; for (SmallVector::iterator I = Stmt->memacc_begin(), E = Stmt->memacc_end(); I != E; ++I) { Value *BaseAddr = const_cast((*I)->getBaseAddr()); - OMPDataVals.insert((BaseAddr)); + Values.insert((BaseAddr)); } } - return OMPDataVals; + return Values; } -void ClastStmtCodeGen::extractValuesFromOpenMPStruct(CharMapT *clastVarsOMP, - SetVector OMPDataVals, Value *userContext) { - // Extract the clast variables. - unsigned i = 0; +void ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, + CharMapT &ClastVarsNew) { + std::set Inserted; + for (CharMapT::iterator I = ClastVars->begin(), E = ClastVars->end(); I != E; I++) { - Value *loadAddr = Builder.CreateStructGEP(userContext, i); - (*clastVarsOMP)[I->first] = Builder.CreateLoad(loadAddr); - i++; + ClastVarsNew[I->first] = VMap[I->second]; + Inserted.insert(I->second); } - // Extract the base addresses of memory references. - for (unsigned j = i; j < OMPDataVals.size(); j++) { - Value *loadAddr = Builder.CreateStructGEP(userContext, j); - Value *baseAddr = OMPDataVals[j]; - ValueMap[baseAddr] = Builder.CreateLoad(loadAddr); + for (std::map::iterator I = VMap.begin(), E = VMap.end(); + I != E; ++I) { + if (Inserted.count(I->first)) + continue; + + ValueMap[I->first] = I->second; } } -void ClastStmtCodeGen::addOpenMPSubfunctionBody(Function *FN, - const clast_for *f, - Value *StructData, - SetVector OMPDataVals) { - Type *IntPtrTy = getIntPtrTy(); - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - LLVMContext &Context = FN->getContext(); +void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { + Value *Stride, *LowerBound, *UpperBound, *IV; + BasicBlock::iterator LoopBody; + IntegerType *IntPtrTy = getIntPtrTy(); + SetVector Values; + OMPGenerator::ValueToValueMapTy VMap; + OMPGenerator OMPGen(Builder, P); - // Store the previous basic block. - BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint(); - BasicBlock *PrevBB = Builder.GetInsertBlock(); - // Create basic blocks. - BasicBlock *HeaderBB = BasicBlock::Create(Context, "omp.setup", FN); - BasicBlock *ExitBB = BasicBlock::Create(Context, "omp.exit", FN); - BasicBlock *CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN); - BasicBlock *LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", - FN); + Stride = Builder.getInt(APInt_from_MPZ(For->stride)); + Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy); + LowerBound = ExpGen.codegen(For->LB, IntPtrTy); + UpperBound = ExpGen.codegen(For->UB, IntPtrTy); - DominatorTree &DT = P->getAnalysis(); - DT.addNewBlock(HeaderBB, PrevBB); - DT.addNewBlock(ExitBB, HeaderBB); - DT.addNewBlock(CheckNextBB, HeaderBB); - DT.addNewBlock(LoadIVBoundsBB, HeaderBB); + Values = getOMPValues(); - // Fill up basic block HeaderBB. - Builder.SetInsertPoint(HeaderBB); - Value *LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr"); - Value *UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr"); - Value *UserContext = Builder.CreateBitCast(FN->arg_begin(), - StructData->getType(), - "omp.userContext"); - - CharMapT ClastVarsOMP; - extractValuesFromOpenMPStruct(&ClastVarsOMP, OMPDataVals, UserContext); - - Builder.CreateBr(CheckNextBB); - - // Add code to check if another set of iterations will be executed. - Builder.SetInsertPoint(CheckNextBB); - Function *RuntimeNextFunction = M->getFunction("GOMP_loop_runtime_next"); - Value *Ret1 = Builder.CreateCall2(RuntimeNextFunction, - LowerBoundPtr, UpperBoundPtr); - Value *HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), - "omp.hasNextScheduleBlock"); - Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB); - - // Add code to to load the iv bounds for this set of iterations. - Builder.SetInsertPoint(LoadIVBoundsBB); - Value *LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound"); - Value *UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound"); - - // Subtract one as the upper bound provided by openmp is a < comparison - // whereas the codegenForSequential function creates a <= comparison. - UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1), - "omp.upperBoundAdjusted"); + IV = OMPGen.createParallelLoop(LowerBound, UpperBound, Stride, Values, VMap, + &LoopBody); + BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); + Builder.SetInsertPoint(LoopBody); // Use clastVarsOMP during code generation of the OpenMP subfunction. + CharMapT ClastVarsOMP; + updateWithValueMap(VMap, ClastVarsOMP); CharMapT *OldClastVars = ClastVars; ClastVars = &ClastVarsOMP; ExpGen.setIVS(&ClastVarsOMP); - Builder.CreateBr(CheckNextBB); - Builder.SetInsertPoint(--Builder.GetInsertPoint()); - codegenForSequential(f, LowerBound, UpperBound); + // Add loop iv to symbols. + (*ClastVars)[For->iterator] = IV; + + if (For->body) + codegen(For->body); + + // Loop is finished, so remove its iv from the live symbols. + ClastVars->erase(For->iterator); // Restore the old clastVars. ClastVars = OldClastVars; ExpGen.setIVS(OldClastVars); - - // Add code to terminate this openmp subfunction. - Builder.SetInsertPoint(ExitBB); - Function *EndnowaitFunction = M->getFunction("GOMP_loop_end_nowait"); - Builder.CreateCall(EndnowaitFunction); - Builder.CreateRetVoid(); - - // Restore the previous insert point. - Builder.SetInsertPoint(PrevInsertPoint); -} - -void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - IntegerType *IntPtrTy = getIntPtrTy(); - - Function *SubFunction = addOpenMPSubfunction(M); - SetVector OMPDataVals = createOpenMPStructValues(); - Value *StructData = addValuesToOpenMPStruct(OMPDataVals, SubFunction); - - addOpenMPSubfunctionBody(SubFunction, For, StructData, OMPDataVals); - - // Create call for GOMP_parallel_loop_runtime_start. - Value *SubfunctionParam = Builder.CreateBitCast(StructData, - Builder.getInt8PtrTy(), - "omp_data"); - - Value *NumberOfThreads = Builder.getInt32(0); - Value *LowerBound = ExpGen.codegen(For->LB, IntPtrTy); - Value *UpperBound = ExpGen.codegen(For->UB, IntPtrTy); - - // Add one as the upper bound provided by openmp is a < comparison - // whereas the codegenForSequential function creates a <= comparison. - UpperBound = Builder.CreateAdd(UpperBound, ConstantInt::get(IntPtrTy, 1)); - APInt APStride = APInt_from_MPZ(For->stride); - Value *Stride = ConstantInt::get(IntPtrTy, - APStride.zext(IntPtrTy->getBitWidth())); - - Value *Arguments[] = { SubFunction, SubfunctionParam, NumberOfThreads, - LowerBound, UpperBound, Stride}; - Builder.CreateCall(M->getFunction("GOMP_parallel_loop_runtime_start"), - Arguments); - Builder.CreateCall(SubFunction, SubfunctionParam); - Builder.CreateCall(M->getFunction("GOMP_parallel_end")); + Builder.SetInsertPoint(AfterLoop); } bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) { @@ -1679,53 +1481,6 @@ class CodeGeneration : public ScopPass { CodeGeneration() : ScopPass(ID) {} - // Add the declarations needed by the OpenMP function calls that we insert in - // OpenMP mode. - void addOpenMPDeclarations(Module *M) - { - IRBuilder<> Builder(M->getContext()); - Type *LongTy = getAnalysis().getIntPtrType(M->getContext()); - - llvm::GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; - - if (!M->getFunction("GOMP_parallel_end")) { - FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); - Function::Create(Ty, Linkage, "GOMP_parallel_end", M); - } - - if (!M->getFunction("GOMP_parallel_loop_runtime_start")) { - Type *Params[] = { - PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(), - Builder.getInt8PtrTy(), - false)), - Builder.getInt8PtrTy(), - Builder.getInt32Ty(), - LongTy, - LongTy, - LongTy, - }; - - FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); - Function::Create(Ty, Linkage, "GOMP_parallel_loop_runtime_start", M); - } - - if (!M->getFunction("GOMP_loop_runtime_next")) { - PointerType *LongPtrTy = PointerType::getUnqual(LongTy); - Type *Params[] = { - LongPtrTy, - LongPtrTy, - }; - - FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); - Function::Create(Ty, Linkage, "GOMP_loop_runtime_next", M); - } - - if (!M->getFunction("GOMP_loop_end_nowait")) { - FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); - Function::Create(Ty, Linkage, "GOMP_loop_end_nowait", M); - } - } - // Split the entry edge of the region and generate a new basic block on this // edge. This function also updates ScopInfo and RegionInfo. // @@ -1820,10 +1575,6 @@ class CodeGeneration : public ScopPass { assert(region->isSimple() && "Only simple regions are supported"); - Module *M = region->getEntry()->getParent()->getParent(); - - if (OpenMP) addOpenMPDeclarations(M); - // In the CFG the optimized code of the SCoP is generated next to the // original code. Both the new and the original version of the code remain // in the CFG. A branch statement decides which version is executed. diff --git a/polly/lib/LoopGenerators.cpp b/polly/lib/LoopGenerators.cpp new file mode 100644 index 000000000000..54c188965355 --- /dev/null +++ b/polly/lib/LoopGenerators.cpp @@ -0,0 +1,335 @@ +//===------ LoopGenerators.cpp - IR helper to create loops ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions to create scalar and OpenMP parallel loops +// as LLVM-IR. +// +//===----------------------------------------------------------------------===// + +#include "polly/LoopGenerators.h" +#include "polly/ScopDetection.h" + +#include "llvm/Module.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +Value *createLoop(Value *LB, Value *UB, Value *Stride, + IRBuilder<> *Builder, Pass *P, BasicBlock **AfterBlock) { + DominatorTree &DT = P->getAnalysis(); + Function *F = Builder->GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + + BasicBlock *PreheaderBB = Builder->GetInsertBlock(); + BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); + BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F); + BasicBlock *AfterBB = SplitBlock(PreheaderBB, Builder->GetInsertPoint()++, P); + AfterBB->setName("polly.loop_after"); + + PreheaderBB->getTerminator()->setSuccessor(0, HeaderBB); + DT.addNewBlock(HeaderBB, PreheaderBB); + + Builder->SetInsertPoint(HeaderBB); + + // Use the type of upper and lower bound. + assert(LB->getType() == UB->getType() + && "Different types for upper and lower bound."); + + IntegerType *LoopIVType = dyn_cast(UB->getType()); + assert(LoopIVType && "UB is not integer?"); + + // IV + PHINode *IV = Builder->CreatePHI(LoopIVType, 2, "polly.loopiv"); + IV->addIncoming(LB, PreheaderBB); + + Stride = Builder->CreateZExtOrBitCast(Stride, LoopIVType); + Value *IncrementedIV = Builder->CreateAdd(IV, Stride, "polly.next_loopiv"); + + // Exit condition. + Value *CMP; + CMP = Builder->CreateICmpSLE(IV, UB); + + Builder->CreateCondBr(CMP, BodyBB, AfterBB); + DT.addNewBlock(BodyBB, HeaderBB); + + Builder->SetInsertPoint(BodyBB); + Builder->CreateBr(HeaderBB); + IV->addIncoming(IncrementedIV, BodyBB); + DT.changeImmediateDominator(AfterBB, HeaderBB); + + Builder->SetInsertPoint(BodyBB->begin()); + *AfterBlock = AfterBB; + + return IV; +} + +void OMPGenerator::createCallParallelLoopStart(Value *SubFunction, + Value *SubfunctionParam, + Value *NumberOfThreads, + Value *LowerBound, + Value *UpperBound, + Value *Stride) { + Module *M = getModule(); + const char *Name = "GOMP_parallel_loop_runtime_start"; + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + Type *LongTy = getIntPtrTy(); + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + + Type *Params[] = { + PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(), + Builder.getInt8PtrTy(), + false)), + Builder.getInt8PtrTy(), + Builder.getInt32Ty(), + LongTy, + LongTy, + LongTy, + }; + + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Value *Args[] = { + SubFunction, + SubfunctionParam, + NumberOfThreads, + LowerBound, + UpperBound, + Stride, + }; + + Builder.CreateCall(F, Args); +} + +Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, + Value *UpperBoundPtr) { + Module *M = getModule(); + const char *Name = "GOMP_loop_runtime_next"; + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy()); + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + + Type *Params[] = { + LongPtrTy, + LongPtrTy, + }; + + FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Value *Args[] = { + LowerBoundPtr, + UpperBoundPtr, + }; + + Value *Return = Builder.CreateCall(F, Args); + Return = Builder.CreateICmpNE(Return, Builder.CreateZExt(Builder.getFalse(), + Return->getType())); + return Return; +} + +void OMPGenerator::createCallParallelEnd() { + const char *Name = "GOMP_parallel_end"; + Module *M = getModule(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F); +} + +void OMPGenerator::createCallLoopEndNowait() { + const char *Name = "GOMP_loop_end_nowait"; + Module *M = getModule(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F); +} + +IntegerType *OMPGenerator::getIntPtrTy() { + return P->getAnalysis().getIntPtrType(Builder.getContext()); +} + +Module *OMPGenerator::getModule() { + return Builder.GetInsertBlock()->getParent()->getParent(); +} + +Function *OMPGenerator::createSubfunctionDefinition() { + Module *M = getModule(); + Function *F = Builder.GetInsertBlock()->getParent(); + std::vector Arguments(1, Builder.getInt8PtrTy()); + FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); + Function *FN = Function::Create(FT, Function::InternalLinkage, + F->getName() + ".omp_subfn", M); + // Do not run any polly pass on the new function. + P->getAnalysis().markFunctionAsInvalid(FN); + + Function::arg_iterator AI = FN->arg_begin(); + AI->setName("omp.userContext"); + + return FN; +} + +Value *OMPGenerator::loadValuesIntoStruct(SetVector &Values) { + std::vector Members; + + for (unsigned i = 0; i < Values.size(); i++) + Members.push_back(Values[i]->getType()); + + StructType *Ty = StructType::get(Builder.getContext(), Members); + Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext"); + + for (unsigned i = 0; i < Values.size(); i++) { + Value *Address = Builder.CreateStructGEP(Struct, i); + Builder.CreateStore(Values[i], Address); + } + + return Struct; +} + +void OMPGenerator::extractValuesFromStruct(SetVector OldValues, + Value *Struct, + ValueToValueMapTy &Map) { + for (unsigned i = 0; i < OldValues.size(); i++) { + Value *Address = Builder.CreateStructGEP(Struct, i); + Value *NewValue = Builder.CreateLoad(Address); + Map.insert(std::make_pair(OldValues[i], NewValue)); + } +} + +Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData, + SetVector Data, + ValueToValueMapTy &Map, + Function **SubFunction) { + Function *FN = createSubfunctionDefinition(); + + BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB, + *AfterBB; + Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule, + *LowerBound, *UpperBound, *IV; + Type *IntPtrTy = getIntPtrTy(); + LLVMContext &Context = FN->getContext(); + + // Store the previous basic block. + PrevBB = Builder.GetInsertBlock(); + + // Create basic blocks. + HeaderBB = BasicBlock::Create(Context, "omp.setup", FN); + ExitBB = BasicBlock::Create(Context, "omp.exit", FN); + CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN); + LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN); + + DominatorTree &DT = P->getAnalysis(); + DT.addNewBlock(HeaderBB, PrevBB); + DT.addNewBlock(ExitBB, HeaderBB); + DT.addNewBlock(CheckNextBB, HeaderBB); + DT.addNewBlock(LoadIVBoundsBB, HeaderBB); + + // Fill up basic block HeaderBB. + Builder.SetInsertPoint(HeaderBB); + LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr"); + UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr"); + UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(), + "omp.userContext"); + + extractValuesFromStruct(Data, UserContext, Map); + Builder.CreateBr(CheckNextBB); + + // Add code to check if another set of iterations will be executed. + Builder.SetInsertPoint(CheckNextBB); + Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr); + HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), + "omp.hasNextScheduleBlock"); + Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB); + + // Add code to to load the iv bounds for this set of iterations. + Builder.SetInsertPoint(LoadIVBoundsBB); + LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound"); + UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound"); + + // Subtract one as the upper bound provided by openmp is a < comparison + // whereas the codegenForSequential function creates a <= comparison. + UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1), + "omp.upperBoundAdjusted"); + + Builder.CreateBr(CheckNextBB); + Builder.SetInsertPoint(--Builder.GetInsertPoint()); + IV = createLoop(LowerBound, UpperBound, Stride, &Builder, P, &AfterBB); + + BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); + Builder.SetInsertPoint(AfterBB->begin()); + + // Add code to terminate this openmp subfunction. + Builder.SetInsertPoint(ExitBB); + createCallLoopEndNowait(); + Builder.CreateRetVoid(); + + Builder.SetInsertPoint(LoopBody); + *SubFunction = FN; + + return IV; +} + +Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound, + Value *Stride, + SetVector &Values, + ValueToValueMapTy &Map, + BasicBlock::iterator *LoopBody) { + Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads; + Function *SubFunction; + + Struct = loadValuesIntoStruct(Values); + + BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint(); + IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction); + *LoopBody = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PrevInsertPoint); + + // Create call for GOMP_parallel_loop_runtime_start. + SubfunctionParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), + "omp_data"); + + NumberOfThreads = Builder.getInt32(0); + + // Add one as the upper bound provided by openmp is a < comparison + // whereas the codegenForSequential function creates a <= comparison. + UpperBound = Builder.CreateAdd(UpperBound, + ConstantInt::get(getIntPtrTy(), 1)); + + createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads, + LowerBound, UpperBound, Stride); + Builder.CreateCall(SubFunction, SubfunctionParam); + createCallParallelEnd(); + + return IV; +}