Add scalar and phi code generation

To reduce compile time and to allow more and better quality SCoPs in
  the long run we introduced scalar dependences and PHI-modeling. This
  patch will now allow us to generate code if one or both of those
  options are set. While the principle of demoting scalars as well as
  PHIs to memory in order to communicate their value stays the same,
  this allows to delay the demotion till the very end (the actual code
  generation). Consequently:
    - We __almost__ do not modify the code if we do not generate code
      for an optimized SCoP in the end. Thus, the early exit as well as
      the unprofitable option will now actually preven us from
      introducing regressions in case we will probably not get better
      code.
    - Polly can be used as a "pure" analyzer tool as long as the code
      generator is set to none.
    - The original SCoP is almost not touched when the optimized version
      is placed next to it. Runtime regressions if the runtime checks
      chooses the original are not to be expected and later
      optimizations do not need to revert the demotion for that part.
    - We will generate direct accesses to the demoted values, thus there
      are no "trivial GEPs" that select the first element of a scalar we
      demoted and treated as an array.

Differential Revision: http://reviews.llvm.org/D7513

llvm-svn: 238070
This commit is contained in:
Johannes Doerfert 2015-05-22 23:43:58 +00:00
parent 755d58a463
commit ecff11dcfb
13 changed files with 1373 additions and 50 deletions

View File

@ -63,6 +63,24 @@ bool isIgnoredIntrinsic(const llvm::Value *V);
/// @brief Generate a new basic block for a polyhedral statement. /// @brief Generate a new basic block for a polyhedral statement.
class BlockGenerator { class BlockGenerator {
public: public:
/// @brief Map types to resolve scalar dependences.
///
///@{
/// @see The ScalarMap and PHIOpMap member.
using ScalarAllocaMapTy = DenseMap<Instruction *, AllocaInst *>;
/// @brief Simple vector of instructions to store escape users.
using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
/// @brief Map type to resolve escaping users for scalar instructions.
///
/// @see The EscapeMap member.
using EscapeUsersAllocaMapTy =
DenseMap<Instruction *, std::pair<AllocaInst *, EscapeUserVectorTy>>;
///@}
/// @brief Create a generator for basic blocks. /// @brief Create a generator for basic blocks.
/// ///
/// @param Builder The LLVM-IR Builder used to generate the statement. The /// @param Builder The LLVM-IR Builder used to generate the statement. The
@ -71,9 +89,14 @@ public:
/// @param LI The loop info for the current function /// @param LI The loop info for the current function
/// @param SE The scalar evolution info for the current function /// @param SE The scalar evolution info for the current function
/// @param DT The dominator tree of this function. /// @param DT The dominator tree of this function.
/// @param ScalarMap Map from scalars to their demoted location.
/// @param PHIOpMap Map from PHIs to their demoted operand location.
/// @param EscapeMap Map from scalars to their escape users and locations.
/// @param ExprBuilder An expression builder to generate new access functions. /// @param ExprBuilder An expression builder to generate new access functions.
BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
DominatorTree &DT, IslExprBuilder *ExprBuilder = nullptr); DominatorTree &DT, ScalarAllocaMapTy &ScalarMap,
ScalarAllocaMapTy &PHIOpMap, EscapeUsersAllocaMapTy &EscapeMap,
IslExprBuilder *ExprBuilder = nullptr);
/// @brief Copy the basic block. /// @brief Copy the basic block.
/// ///
@ -87,6 +110,18 @@ public:
/// @param LTS A map from old loops to new induction variables as SCEVs. /// @param LTS A map from old loops to new induction variables as SCEVs.
void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT &LTS); void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT &LTS);
/// @brief Finalize the code generation for the SCoP @p S.
///
/// This will initialize and finalize the scalar variables we demoted during
/// the code generation.
///
/// @see createScalarInitialization(Region &, ValueMapT &)
/// @see createScalarFinalization(Region &)
void finalizeSCoP(Scop &S, ValueMapT &VMap);
/// @brief An empty destructor
virtual ~BlockGenerator(){};
protected: protected:
PollyIRBuilder &Builder; PollyIRBuilder &Builder;
LoopInfo &LI; LoopInfo &LI;
@ -96,6 +131,42 @@ protected:
/// @brief The dominator tree of this function. /// @brief The dominator tree of this function.
DominatorTree &DT; DominatorTree &DT;
/// @brief The entry block of the current function.
BasicBlock *EntryBB;
/// @brief Maps to resolve scalar dependences for PHI operands and scalars.
///
/// Usage example:
///
/// x1 = ... // x1 will be inserted in the ScalarMap and PhiOpMap.
/// for (i=0...N) {
/// x2 = phi(x1, add) // x2 will be inserted in the ScalarMap, x1 and
/// // add are mapped in the PHIOpMap.
/// add = x2 + A[i]; // add will be inserted in the ScalarMap and
/// // the PhiOpMap.
/// }
/// print(x1) // x1 is mapped in the ScalarMap.
/// print(x2) // x2 is mapped in the ScalarMap.
/// print(add) // add is mapped in the ScalarMap.
///
///{
/// The PHIOpMap is used to get the alloca to communicate a value to a PHI
/// node, hence when the operand of a PHI is demoted the corresponding write
/// access will use the PHIOpMap to look for the correct alloca. PHI nodes
/// will then read that location in order to get the correct/current operand
/// value.
ScalarAllocaMapTy &PHIOpMap;
/// The ScalarMap is used in __all__ other cases, thus always when a scalar
/// variable is read/written and the write is not because the scalar is a PHI
/// operand.
ScalarAllocaMapTy &ScalarMap;
///}
/// @brief Map from instructions to their escape users as well as the alloca.
EscapeUsersAllocaMapTy &EscapeMap;
/// @brief Split @p BB to create a new one we can use to clone @p BB in. /// @brief Split @p BB to create a new one we can use to clone @p BB in.
BasicBlock *splitBB(BasicBlock *BB); BasicBlock *splitBB(BasicBlock *BB);
@ -128,6 +199,64 @@ protected:
void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy, void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT &LTS); ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT &LTS);
/// @brief Return the alloca for @p ScalarBase in @p Map.
///
/// If no alloca was mapped to @p ScalarBase in @p Map a new one is created
/// and named after @p ScalarBase with the suffix @p NameExt.
///
/// @param ScalarBase The demoted scalar instruction.
/// @param Map The map we should look for a mapped alloca instruction.
/// @param NameExt The suffix we add to the name of a new created alloca.
/// @param IsNew If set it will hold true iff the alloca was created.
///
/// @returns The alloca for @p ScalarBase in @p Map.
AllocaInst *getOrCreateAlloca(Instruction *ScalarBase, ScalarAllocaMapTy &Map,
const char *NameExt = ".s2a",
bool *IsNew = nullptr);
/// @brief Generate reload of scalars demoted to memory and needed by @p Inst.
///
/// @param Stmt The statement we generate code for.
/// @param Inst The instruction that might need reloaded values.
/// @param BBMap A mapping from old values to their new values in this block.
virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst,
ValueMapT &BBMap);
/// @brief Generate the scalar stores for the given statement.
///
/// After the statement @p Stmt was copied all inner-SCoP scalar dependences
/// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
/// be demoted to memory.
///
/// @param Stmt The statement we generate code for.
/// @param BB The basic block we generate code for.
/// @param BBMap A mapping from old values to their new values in this block.
/// @param GlobalMap A mapping for globally replaced values.
virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
ValueMapT &BBMAp, ValueMapT &GlobalMap);
/// @brief Handle users of @p Inst outside the SCoP.
///
/// @param R The current SCoP region.
/// @param Inst The current instruction we check.
/// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP.
void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy);
/// @brief Initialize the memory of demoted scalars.
///
/// If a PHI node was demoted and one of its predecessor blocks was outside
/// the SCoP we need to initialize the memory cell we demoted the PHI into
/// with the value corresponding to that predecessor. As a SCoP is a
/// __single__ entry region there is at most one such predecessor.
void createScalarInitialization(Region &R, ValueMapT &VMap);
/// @brief Promote the values of demoted scalars after the SCoP.
///
/// If a scalar value was used outside the SCoP we need to promote the value
/// stored in the memory cell allocated for that scalar and combine it with
/// the original value in the non-optimized SCoP.
void createScalarFinalization(Region &R);
/// @brief Get the new version of a value. /// @brief Get the new version of a value.
/// ///
/// Given an old value, we first check if a new version of this value is /// Given an old value, we first check if a new version of this value is
@ -183,6 +312,17 @@ protected:
ValueMapT &BBMap, ValueMapT &GlobalMap, ValueMapT &BBMap, ValueMapT &GlobalMap,
LoopToScevMapT &LTS); LoopToScevMapT &LTS);
/// @brief Copy a single PHI instruction.
///
/// The implementation in the BlockGenerator is trivial, however it allows
/// subclasses to handle PHIs different.
///
/// @returns The nullptr as the BlockGenerator does not copy PHIs.
virtual Value *copyPHIInstruction(ScopStmt &, const PHINode *, ValueMapT &,
ValueMapT &, LoopToScevMapT &) {
return nullptr;
}
/// @brief Copy a single Instruction. /// @brief Copy a single Instruction.
/// ///
/// This copies a single Instruction and updates references to old values /// This copies a single Instruction and updates references to old values
@ -202,6 +342,22 @@ protected:
void copyInstruction(ScopStmt &Stmt, const Instruction *Inst, void copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
ValueMapT &BBMap, ValueMapT &GlobalMap, ValueMapT &BBMap, ValueMapT &GlobalMap,
LoopToScevMapT &LTS); LoopToScevMapT &LTS);
/// @brief Helper to get the newest version of @p ScalarValue.
///
/// @param ScalarValue The original value needed.
/// @param R The current SCoP region.
/// @param ReloadMap The scalar map for demoted values.
/// @param BBMap A mapping from old values to their new values
/// (for values recalculated within this basic block).
/// @param GlobalMap A mapping from old values to their new values
/// (for values recalculated in the new ScoP, but not
/// within this basic block).
///
/// @returns The newest version (e.g., reloaded) of the scalar value.
Value *getNewScalarValue(Value *ScalarValue, const Region &R,
ScalarAllocaMapTy &ReloadMap, ValueMapT &BBMap,
ValueMapT &GlobalMap);
}; };
/// @brief Generate a new vector basic block for a polyhedral statement. /// @brief Generate a new vector basic block for a polyhedral statement.
@ -374,12 +530,82 @@ public:
/// @param LTS A map from old loops to new induction variables as SCEVs. /// @param LTS A map from old loops to new induction variables as SCEVs.
void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT &LTS); void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT &LTS);
/// @brief An empty destructor
virtual ~RegionGenerator(){};
private: private:
/// @brief A map from old to new blocks in the region.
DenseMap<BasicBlock *, BasicBlock *> BlockMap;
/// @brief The "BBMaps" for the whole region (one for each block).
DenseMap<BasicBlock *, ValueMapT> RegionMaps;
/// @brief Mapping to remember PHI nodes that still need incoming values.
using PHINodePairTy = std::pair<const PHINode *, PHINode *>;
DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
/// @brief Repair the dominance tree after we created a copy block for @p BB. /// @brief Repair the dominance tree after we created a copy block for @p BB.
/// ///
/// @returns The immediate dominator in the DT for @p BBCopy if in the region. /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy, BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
DenseMap<BasicBlock *, BasicBlock *> &BlockMap);
/// @brief Add the new operand from the copy of @p IncomingBB to @p PHICopy.
///
/// @param Stmt The statement to code generate.
/// @param PHI The original PHI we copy.
/// @param PHICopy The copy of @p PHI.
/// @param IncomingBB An incoming block of @p PHI.
/// @param GlobalMap A mapping from old values to their new values
/// (for values recalculated in the new ScoP, but not
/// within this basic block).
/// @param LTS A map from old loops to new induction variables as
/// SCEVs.
void addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy,
BasicBlock *IncomingBB, ValueMapT &GlobalMap,
LoopToScevMapT &LTS);
/// @brief Generate reload of scalars demoted to memory and needed by @p Inst.
///
/// @param Stmt The statement we generate code for.
/// @param Inst The instruction that might need reloaded values.
/// @param BBMap A mapping from old values to their new values in this block.
virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst,
ValueMapT &BBMap) override;
/// @brief Generate the scalar stores for the given statement.
///
/// After the statement @p Stmt was copied all inner-SCoP scalar dependences
/// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
/// be demoted to memory.
///
/// @param Stmt The statement we generate code for.
/// @param BB The basic block we generate code for.
/// @param BBMap A mapping from old values to their new values in this block.
/// @param GlobalMap A mapping from old values to their new values
/// (for values recalculated in the new ScoP, but not
/// within this basic block).
virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
ValueMapT &BBMAp,
ValueMapT &GlobalMap) override;
/// @brief Copy a single PHI instruction.
///
/// This copies a single PHI instruction and updates references to old values
/// with references to new values, as defined by GlobalMap and BBMap.
///
/// @param Stmt The statement to code generate.
/// @param PHI The PHI instruction to copy.
/// @param BBMap A mapping from old values to their new values
/// (for values recalculated within this basic block).
/// @param GlobalMap A mapping from old values to their new values
/// (for values recalculated in the new ScoP, but not
/// within this basic block).
/// @param LTS A map from old loops to new induction variables as SCEVs.
///
/// @returns The copied instruction or nullptr if no copy was made.
virtual Value *copyPHIInstruction(ScopStmt &Stmt, const PHINode *Inst,
ValueMapT &BBMap, ValueMapT &GlobalMap,
LoopToScevMapT &LTS) override;
}; };
} }
#endif #endif

View File

@ -32,13 +32,20 @@ public:
DominatorTree &DT, Scop &S) DominatorTree &DT, Scop &S)
: S(S), Builder(Builder), Annotator(Annotator), Rewriter(SE, DL, "polly"), : S(S), Builder(Builder), Annotator(Annotator), Rewriter(SE, DL, "polly"),
ExprBuilder(Builder, IDToValue, Rewriter, DT, LI), ExprBuilder(Builder, IDToValue, Rewriter, DT, LI),
BlockGen(Builder, LI, SE, DT, &ExprBuilder), RegionGen(BlockGen), P(P), BlockGen(Builder, LI, SE, DT, ScalarMap, PHIOpMap, EscapeMap,
DL(DL), LI(LI), SE(SE), DT(DT) {} &ExprBuilder),
RegionGen(BlockGen), P(P), DL(DL), LI(LI), SE(SE), DT(DT) {}
~IslNodeBuilder() {} ~IslNodeBuilder() {}
void addParameters(__isl_take isl_set *Context); void addParameters(__isl_take isl_set *Context);
void create(__isl_take isl_ast_node *Node); void create(__isl_take isl_ast_node *Node);
/// @brief Finalize code generation for the SCoP @p S.
///
/// @see BlockGenerator::finalizeSCoP(Scop &S)
void finalizeSCoP(Scop &S) { BlockGen.finalizeSCoP(S, ValueMap); }
IslExprBuilder &getExprBuilder() { return ExprBuilder; } IslExprBuilder &getExprBuilder() { return ExprBuilder; }
private: private:
@ -50,9 +57,26 @@ private:
SCEVExpander Rewriter; SCEVExpander Rewriter;
IslExprBuilder ExprBuilder; IslExprBuilder ExprBuilder;
/// @brief Maps used by the block and region generator to demote scalars.
///
///@{
/// @brief See BlockGenerator::ScalarMap.
BlockGenerator::ScalarAllocaMapTy ScalarMap;
/// @brief See BlockGenerator::PhiOpMap.
BlockGenerator::ScalarAllocaMapTy PHIOpMap;
/// @brief See BlockGenerator::EscapeMap.
BlockGenerator::EscapeUsersAllocaMapTy EscapeMap;
///@}
/// @brief The generator used to copy a basic block.
BlockGenerator BlockGen; BlockGenerator BlockGen;
/// @brief Generator for region statements. /// @brief The generator used to copy a non-affine region.
RegionGenerator RegionGen; RegionGenerator RegionGen;
Pass *const P; Pass *const P;

View File

@ -25,6 +25,8 @@
#include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionPass.h"
#include "isl/ctx.h" #include "isl/ctx.h"
#include <forward_list>
using namespace llvm; using namespace llvm;
namespace llvm { namespace llvm {
@ -410,7 +412,11 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
/// accesses. /// accesses.
/// At the moment every statement represents a single basic block of LLVM-IR. /// At the moment every statement represents a single basic block of LLVM-IR.
class ScopStmt { class ScopStmt {
//===-------------------------------------------------------------------===// public:
/// @brief List to hold all (scalar) memory accesses mapped to an instruction.
using MemoryAccessList = std::forward_list<MemoryAccess>;
private:
ScopStmt(const ScopStmt &) = delete; ScopStmt(const ScopStmt &) = delete;
const ScopStmt &operator=(const ScopStmt &) = delete; const ScopStmt &operator=(const ScopStmt &) = delete;
@ -476,7 +482,9 @@ class ScopStmt {
/// The only side effects of a statement are its memory accesses. /// The only side effects of a statement are its memory accesses.
typedef SmallVector<MemoryAccess *, 8> MemoryAccessVec; typedef SmallVector<MemoryAccess *, 8> MemoryAccessVec;
MemoryAccessVec MemAccs; MemoryAccessVec MemAccs;
std::map<const Instruction *, MemoryAccess *> InstructionToAccess;
/// @brief Mapping from instructions to (scalar) memory accesses.
DenseMap<const Instruction *, MemoryAccessList *> InstructionToAccess;
//@} //@}
@ -628,16 +636,31 @@ public:
/// @brief Return true if this statement represents a whole region. /// @brief Return true if this statement represents a whole region.
bool isRegionStmt() const { return R != nullptr; } bool isRegionStmt() const { return R != nullptr; }
const MemoryAccess &getAccessFor(const Instruction *Inst) const { /// @brief Return the (scalar) memory accesses for @p Inst.
MemoryAccess *A = lookupAccessFor(Inst); const MemoryAccessList &getAccessesFor(const Instruction *Inst) const {
assert(A && "Cannot get memory access because it does not exist!"); MemoryAccessList *MAL = lookupAccessesFor(Inst);
return *A; assert(MAL && "Cannot get memory accesses because they do not exist!");
return *MAL;
} }
/// @brief Return the (scalar) memory accesses for @p Inst if any.
MemoryAccessList *lookupAccessesFor(const Instruction *Inst) const {
auto It = InstructionToAccess.find(Inst);
return It == InstructionToAccess.end() ? nullptr : It->getSecond();
}
/// @brief Return the __first__ (scalar) memory access for @p Inst.
const MemoryAccess &getAccessFor(const Instruction *Inst) const {
MemoryAccess *MA = lookupAccessFor(Inst);
assert(MA && "Cannot get memory access because it does not exist!");
return *MA;
}
/// @brief Return the __first__ (scalar) memory access for @p Inst if any.
MemoryAccess *lookupAccessFor(const Instruction *Inst) const { MemoryAccess *lookupAccessFor(const Instruction *Inst) const {
std::map<const Instruction *, MemoryAccess *>::const_iterator at = auto It = InstructionToAccess.find(Inst);
InstructionToAccess.find(Inst); return It == InstructionToAccess.end() ? nullptr
return at == InstructionToAccess.end() ? NULL : at->second; : &It->getSecond()->front();
} }
void setBasicBlock(BasicBlock *Block) { void setBasicBlock(BasicBlock *Block) {

View File

@ -27,6 +27,7 @@
#include "llvm/ADT/MapVector.h" #include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopInfo.h"
@ -877,19 +878,11 @@ void ScopStmt::buildAccesses(TempScop &tempScop, BasicBlock *Block,
if (isApproximated && Access.isWrite()) if (isApproximated && Access.isWrite())
Access.setMayWrite(); Access.setMayWrite();
MemAccs.push_back( MemoryAccessList *&MAL = InstructionToAccess[AccessInst];
new MemoryAccess(Access, AccessInst, this, SAI, MemAccs.size())); if (!MAL)
MAL = new MemoryAccessList();
// We do not track locations for scalar memory accesses at the moment. MAL->emplace_front(Access, AccessInst, this, SAI, MemAccs.size());
// MemAccs.push_back(&MAL->front());
// We do not have a use for this information at the moment. If we need this
// at some point, the "instruction -> access" mapping needs to be enhanced
// as a single instruction could then possibly perform multiple accesses.
if (!Access.isScalar()) {
assert(!InstructionToAccess.count(AccessInst) &&
"Unexpected 1-to-N mapping on instruction to access map!");
InstructionToAccess[AccessInst] = MemAccs.back();
}
} }
} }
@ -1258,11 +1251,7 @@ __isl_give isl_id *ScopStmt::getDomainId() const {
} }
ScopStmt::~ScopStmt() { ScopStmt::~ScopStmt() {
while (!MemAccs.empty()) { DeleteContainerSeconds(InstructionToAccess);
delete MemAccs.back();
MemAccs.pop_back();
}
isl_set_free(Domain); isl_set_free(Domain);
isl_map_free(Schedule); isl_map_free(Schedule);
} }

View File

@ -81,8 +81,13 @@ bool polly::isIgnoredIntrinsic(const Value *V) {
BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI,
ScalarEvolution &SE, DominatorTree &DT, ScalarEvolution &SE, DominatorTree &DT,
ScalarAllocaMapTy &ScalarMap,
ScalarAllocaMapTy &PHIOpMap,
EscapeUsersAllocaMapTy &EscapeMap,
IslExprBuilder *ExprBuilder) IslExprBuilder *ExprBuilder)
: Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT) {} : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap),
EscapeMap(EscapeMap) {}
Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old,
ValueMapT &BBMap, ValueMapT &GlobalMap, ValueMapT &BBMap, ValueMapT &GlobalMap,
@ -242,13 +247,22 @@ Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt,
void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
ValueMapT &BBMap, ValueMapT &GlobalMap, ValueMapT &BBMap, ValueMapT &GlobalMap,
LoopToScevMapT &LTS) { LoopToScevMapT &LTS) {
// First check for possible scalar dependences for this instruction.
generateScalarLoads(Stmt, Inst, BBMap);
// Terminator instructions control the control flow. They are explicitly // Terminator instructions control the control flow. They are explicitly
// expressed in the clast and do not need to be copied. // expressed in the clast and do not need to be copied.
if (Inst->isTerminator()) if (Inst->isTerminator())
return; return;
if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) Loop *L = getLoopForInst(Inst);
if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) {
Value *NewValue = getNewValue(Stmt, Inst, BBMap, GlobalMap, LTS, L);
BBMap[Inst] = NewValue;
return; return;
}
if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS);
@ -266,6 +280,11 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
return; return;
} }
if (const PHINode *PHI = dyn_cast<PHINode>(Inst)) {
copyPHIInstruction(Stmt, PHI, BBMap, GlobalMap, LTS);
return;
}
// Skip some special intrinsics for which we do not adjust the semantics to // Skip some special intrinsics for which we do not adjust the semantics to
// the new schedule. All others are handled like every other instruction. // the new schedule. All others are handled like every other instruction.
if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) {
@ -323,8 +342,329 @@ void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
ValueMapT &BBMap, ValueMapT &GlobalMap, ValueMapT &BBMap, ValueMapT &GlobalMap,
LoopToScevMapT &LTS) { LoopToScevMapT &LTS) {
Builder.SetInsertPoint(CopyBB->begin()); Builder.SetInsertPoint(CopyBB->begin());
EntryBB = &CopyBB->getParent()->getEntryBlock();
for (Instruction &Inst : *BB) for (Instruction &Inst : *BB)
copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS);
// After a basic block was copied store all scalars that escape this block
// in their alloca. First the scalars that have dependences inside the SCoP,
// then the ones that might escape the SCoP.
generateScalarStores(Stmt, BB, BBMap, GlobalMap);
const Region &R = Stmt.getParent()->getRegion();
for (Instruction &Inst : *BB)
handleOutsideUsers(R, &Inst, BBMap[&Inst]);
}
AllocaInst *BlockGenerator::getOrCreateAlloca(Instruction *ScalarBase,
ScalarAllocaMapTy &Map,
const char *NameExt,
bool *IsNew) {
// Check if an alloca was cached for the base instruction.
AllocaInst *&Addr = Map[ScalarBase];
// If needed indicate if it was found already or will be created.
if (IsNew)
*IsNew = (Addr == nullptr);
// If no alloca was found create one and insert it in the entry block.
if (!Addr) {
auto *Ty = ScalarBase->getType();
Addr = new AllocaInst(Ty, ScalarBase->getName() + NameExt);
Addr->insertBefore(EntryBB->getFirstInsertionPt());
}
return Addr;
}
void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst,
Value *InstCopy) {
BasicBlock *ExitBB = R.getExit();
EscapeUserVectorTy EscapeUsers;
for (User *U : Inst->users()) {
// Non-instruction user will never escape.
Instruction *UI = dyn_cast<Instruction>(U);
if (!UI)
continue;
if (R.contains(UI) && ExitBB != UI->getParent())
continue;
EscapeUsers.push_back(UI);
}
// Exit if no escape uses were found.
if (EscapeUsers.empty())
return;
// If there are escape users we get the alloca for this instruction and put
// it in the EscapeMap for later finalization. However, if the alloca was not
// created by an already handled scalar dependence we have to initialize it
// also. Lastly, if the instruction was copied multiple times we already did
// this and can exit.
if (EscapeMap.count(Inst))
return;
// Get or create an escape alloca for this instruction.
bool IsNew;
AllocaInst *ScalarAddr =
getOrCreateAlloca(Inst, ScalarMap, ".escape", &IsNew);
// Remember that this instruction has escape uses and the escape alloca.
EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
// If the escape alloca was just created store the instruction in there,
// otherwise that happened already.
if (IsNew) {
assert(InstCopy && "Except PHIs every instruction should have a copy!");
Builder.CreateStore(InstCopy, ScalarAddr);
}
}
void BlockGenerator::generateScalarLoads(ScopStmt &Stmt,
const Instruction *Inst,
ValueMapT &BBMap) {
// Iterate over all memory accesses for the given instruction and handle all
// scalar reads.
if (ScopStmt::MemoryAccessList *MAL = Stmt.lookupAccessesFor(Inst)) {
for (MemoryAccess &MA : *MAL) {
if (!MA.isScalar() || !MA.isRead())
continue;
Instruction *ScalarBase = cast<Instruction>(MA.getBaseAddr());
Instruction *ScalarInst = MA.getAccessInstruction();
PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase);
// This is either a common scalar use (second case) or the use of a phi
// operand by the PHI node (first case).
if (ScalarBasePHI == ScalarInst) {
AllocaInst *PHIOpAddr =
getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops");
LoadInst *LI =
Builder.CreateLoad(PHIOpAddr, PHIOpAddr->getName() + ".reload");
BBMap[ScalarBase] = LI;
} else {
// For non-PHI operand uses we look up the alloca in the ScalarMap,
// reload it and add the mapping to the ones in the current basic block.
AllocaInst *ScalarAddr =
getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
LoadInst *LI =
Builder.CreateLoad(ScalarAddr, ScalarAddr->getName() + ".reload");
BBMap[ScalarBase] = LI;
}
}
}
}
Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R,
ScalarAllocaMapTy &ReloadMap,
ValueMapT &BBMap,
ValueMapT &GlobalMap) {
// If the value we want to store is an instruction we might have demoted it
// in order to make it accessible here. In such a case a reload is
// necessary. If it is no instruction it will always be a value that
// dominates the current point and we can just use it. In total there are 4
// options:
// (1) The value is no instruction ==> use the value.
// (2) The value is an instruction that was split out of the region prior to
// code generation ==> use the instruction as it dominates the region.
// (3) The value is an instruction:
// (a) The value was defined in the current block, thus a copy is in
// the BBMap ==> use the mapped value.
// (b) The value was defined in a previous block, thus we demoted it
// earlier ==> use the reloaded value.
Instruction *ScalarValueInst = dyn_cast<Instruction>(ScalarValue);
if (!ScalarValueInst)
return ScalarValue;
if (!R.contains(ScalarValueInst)) {
if (Value *ScalarValueCopy = GlobalMap.lookup(ScalarValueInst))
return /* Case (3a) */ ScalarValueCopy;
else
return /* Case 2 */ ScalarValue;
}
if (Value *ScalarValueCopy = BBMap.lookup(ScalarValueInst))
return /* Case (3a) */ ScalarValueCopy;
// Case (3b)
assert(ReloadMap.count(ScalarValueInst) &&
"ScalarInst not mapped in the block and not in the given reload map!");
Value *ReloadAddr = ReloadMap[ScalarValueInst];
ScalarValue =
Builder.CreateLoad(ReloadAddr, ReloadAddr->getName() + ".reload");
return ScalarValue;
}
void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
ValueMapT &BBMap,
ValueMapT &GlobalMap) {
const Region &R = Stmt.getParent()->getRegion();
assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() &&
"Region statements need to use the generateScalarStores() "
"function in the RegionGenerator");
// Set to remember a store to the phiops alloca of a PHINode. It is needed as
// we might have multiple write accesses to the same PHI and while one is the
// self write of the PHI (to the ScalarMap alloca) the other is the write to
// the operand alloca (PHIOpMap).
SmallPtrSet<PHINode *, 4> SeenPHIs;
// Iterate over all accesses in the given statement.
for (MemoryAccess *MA : Stmt) {
// Skip non-scalar and read accesses.
if (!MA->isScalar() || MA->isRead())
continue;
Instruction *ScalarBase = cast<Instruction>(MA->getBaseAddr());
Instruction *ScalarInst = MA->getAccessInstruction();
PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase);
// Get the alloca node for the base instruction and the value we want to
// store. In total there are 4 options:
// (1) The base is no PHI, hence it is a simple scalar def-use chain.
// (2) The base is a PHI,
// (a) and the write is caused by an operand in the block.
// (b) and it is the PHI self write (same as case (1)).
// (c) (2a) and (2b) are not distinguishable.
// For case (1) and (2b) we get the alloca from the scalar map and the value
// we want to store is initialized with the instruction attached to the
// memory access. For case (2a) we get the alloca from the PHI operand map
// and the value we want to store is initialized with the incoming value for
// this block. The tricky case (2c) is when both (2a) and (2b) match. This
// happens if the PHI operand is in the same block as the PHI. To handle
// that we choose the alloca of (2a) first and (2b) for the next write
// access to that PHI (there must be 2).
Value *ScalarValue = nullptr;
AllocaInst *ScalarAddr = nullptr;
if (!ScalarBasePHI) {
// Case (1)
ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
ScalarValue = ScalarInst;
} else {
int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB);
if (ScalarBasePHI != ScalarInst) {
// Case (2a)
assert(PHIIdx >= 0 && "Bad scalar write to PHI operand");
SeenPHIs.insert(ScalarBasePHI);
ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops");
ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx);
} else if (PHIIdx < 0) {
// Case (2b)
ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
ScalarValue = ScalarInst;
} else {
// Case (2c)
if (SeenPHIs.insert(ScalarBasePHI).second) {
// First access ==> same as (2a)
ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops");
ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx);
} else {
// Second access ==> same as (2b)
ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
ScalarValue = ScalarInst;
}
}
}
ScalarValue =
getNewScalarValue(ScalarValue, R, ScalarMap, BBMap, GlobalMap);
Builder.CreateStore(ScalarValue, ScalarAddr);
}
}
void BlockGenerator::createScalarInitialization(Region &R,
ValueMapT &GlobalMap) {
// The split block __just before__ the region and optimized region.
BasicBlock *SplitBB = R.getEnteringBlock();
BranchInst *SplitBBTerm = cast<BranchInst>(SplitBB->getTerminator());
assert(SplitBBTerm->getNumSuccessors() == 2 && "Bad region entering block!");
// Get the start block of the __optimized__ region.
BasicBlock *StartBB = SplitBBTerm->getSuccessor(0);
if (StartBB == R.getEntry())
StartBB = SplitBBTerm->getSuccessor(1);
// For each PHI predecessor outside the region store the incoming operand
// value prior to entering the optimized region.
Builder.SetInsertPoint(StartBB->getTerminator());
ScalarAllocaMapTy EmptyMap;
for (const auto &PHIOpMapping : PHIOpMap) {
const PHINode *PHI = cast<PHINode>(PHIOpMapping.getFirst());
// Check if this PHI has the split block as predecessor (that is the only
// possible predecessor outside the SCoP).
int idx = PHI->getBasicBlockIndex(SplitBB);
if (idx < 0)
continue;
Value *ScalarValue = PHI->getIncomingValue(idx);
ScalarValue =
getNewScalarValue(ScalarValue, R, EmptyMap, GlobalMap, GlobalMap);
// If the split block is the predecessor initialize the PHI operator alloca.
Builder.CreateStore(ScalarValue, PHIOpMapping.getSecond());
}
}
void BlockGenerator::createScalarFinalization(Region &R) {
// The exit block of the __unoptimized__ region.
BasicBlock *ExitBB = R.getExitingBlock();
// The merge block __just after__ the region and the optimized region.
BasicBlock *MergeBB = R.getExit();
// The exit block of the __optimized__ region.
BasicBlock *OptExitBB = *(pred_begin(MergeBB));
if (OptExitBB == ExitBB)
OptExitBB = *(++pred_begin(MergeBB));
Builder.SetInsertPoint(OptExitBB->getTerminator());
for (const auto &EscapeMapping : EscapeMap) {
// Extract the escaping instruction and the escaping users as well as the
// alloca the instruction was demoted to.
Instruction *EscapeInst = EscapeMapping.getFirst();
const auto &EscapeMappingValue = EscapeMapping.getSecond();
const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
AllocaInst *ScalarAddr = EscapeMappingValue.first;
// Reload the demoted instruction in the optimized version of the SCoP.
Instruction *EscapeInstReload =
Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload");
// Create the merge PHI that merges the optimized and unoptimized version.
PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
EscapeInst->getName() + ".merge");
MergePHI->insertBefore(MergeBB->getFirstInsertionPt());
// Add the respective values to the merge PHI.
MergePHI->addIncoming(EscapeInstReload, OptExitBB);
MergePHI->addIncoming(EscapeInst, ExitBB);
// The information of scalar evolution about the escaping instruction needs
// to be revoked so the new merged instruction will be used.
if (SE.isSCEVable(EscapeInst->getType()))
SE.forgetValue(EscapeInst);
// Replace all uses of the demoted instruction with the merge PHI.
for (Instruction *EUser : EscapeUsers)
EUser->replaceUsesOfWith(EscapeInst, MergePHI);
}
}
void BlockGenerator::finalizeSCoP(Scop &S, ValueMapT &GlobalMap) {
createScalarInitialization(S.getRegion(), GlobalMap);
createScalarFinalization(S.getRegion());
} }
VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
@ -679,9 +1019,8 @@ void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) {
copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap);
} }
BasicBlock *RegionGenerator::repairDominance( BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
BasicBlock *BB, BasicBlock *BBCopy, BasicBlock *BBCopy) {
DenseMap<BasicBlock *, BasicBlock *> &BlockMap) {
BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock(); BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom); BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom);
@ -697,20 +1036,31 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap,
assert(Stmt.isRegionStmt() && assert(Stmt.isRegionStmt() &&
"Only region statements can be copied by the block generator"); "Only region statements can be copied by the block generator");
// Forget all old mappings.
BlockMap.clear();
RegionMaps.clear();
IncompletePHINodeMap.clear();
// The region represented by the statement. // The region represented by the statement.
Region *R = Stmt.getRegion(); Region *R = Stmt.getRegion();
// The "BBMaps" for the whole region. // Create a dedicated entry for the region where we can reload all demoted
DenseMap<BasicBlock *, ValueMapT> RegionMaps; // inputs.
BasicBlock *EntryBB = R->getEntry();
BasicBlock *EntryBBCopy =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
Builder.SetInsertPoint(EntryBBCopy->begin());
// A map from old to new blocks in the region for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
DenseMap<BasicBlock *, BasicBlock *> BlockMap; if (!R->contains(*PI))
BlockMap[*PI] = EntryBBCopy;
// Iterate over all blocks in the region in a breadth-first search. // Iterate over all blocks in the region in a breadth-first search.
std::deque<BasicBlock *> Blocks; std::deque<BasicBlock *> Blocks;
SmallPtrSet<BasicBlock *, 8> SeenBlocks; SmallPtrSet<BasicBlock *, 8> SeenBlocks;
Blocks.push_back(R->getEntry()); Blocks.push_back(EntryBB);
SeenBlocks.insert(R->getEntry()); SeenBlocks.insert(EntryBB);
while (!Blocks.empty()) { while (!Blocks.empty()) {
BasicBlock *BB = Blocks.front(); BasicBlock *BB = Blocks.front();
@ -718,7 +1068,10 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap,
// First split the block and update dominance information. // First split the block and update dominance information.
BasicBlock *BBCopy = splitBB(BB); BasicBlock *BBCopy = splitBB(BB);
BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy, BlockMap); BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
// In order to remap PHI nodes we store also basic block mappings.
BlockMap[BB] = BBCopy;
// Get the mapping for this block and initialize it with the mapping // Get the mapping for this block and initialize it with the mapping
// available at its immediate dominator (in the new region). // available at its immediate dominator (in the new region).
@ -728,22 +1081,28 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap,
// Copy the block with the BlockGenerator. // Copy the block with the BlockGenerator.
copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS);
// In order to remap PHI nodes we store also basic block mappings.
BlockMap[BB] = BBCopy;
// Add values to incomplete PHI nodes waiting for this block to be copied.
for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB,
GlobalMap, LTS);
IncompletePHINodeMap[BB].clear();
// And continue with new successors inside the region. // And continue with new successors inside the region.
for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
if (R->contains(*SI) && SeenBlocks.insert(*SI).second) if (R->contains(*SI) && SeenBlocks.insert(*SI).second)
Blocks.push_back(*SI); Blocks.push_back(*SI);
// In order to remap PHI nodes we store also basic block mappings.
BlockMap[BB] = BBCopy;
} }
// Now create a new dedicated region exit block and add it to the region map. // Now create a new dedicated region exit block and add it to the region map.
BasicBlock *ExitBBCopy = BasicBlock *ExitBBCopy =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".as.exit"); ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
BlockMap[R->getExit()] = ExitBBCopy; BlockMap[R->getExit()] = ExitBBCopy;
repairDominance(R->getExit(), ExitBBCopy, BlockMap); repairDominance(R->getExit(), ExitBBCopy);
// As the block generator doesn't handle control flow we need to add the // As the block generator doesn't handle control flow we need to add the
// region control flow by hand after all blocks have been copied. // region control flow by hand after all blocks have been copied.
@ -762,6 +1121,178 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap,
BICopy->eraseFromParent(); BICopy->eraseFromParent();
} }
// Add counting PHI nodes to all loops in the region that can be used as
// replacement for SCEVs refering to the old loop.
for (BasicBlock *BB : SeenBlocks) {
Loop *L = LI.getLoopFor(BB);
if (L == nullptr || L->getHeader() != BB)
continue;
BasicBlock *BBCopy = BlockMap[BB];
Value *NullVal = Builder.getInt32(0);
PHINode *LoopPHI =
PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
LoopPHI->insertBefore(BBCopy->begin());
LoopPHIInc->insertBefore(BBCopy->getTerminator());
for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
if (!R->contains(PredBB))
continue;
if (L->contains(PredBB))
LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]);
else
LoopPHI->addIncoming(NullVal, BlockMap[PredBB]);
}
for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
LoopPHI->addIncoming(NullVal, PredBBCopy);
LTS[L] = SE.getUnknown(LoopPHI);
}
// Add all mappings from the region to the global map so outside uses will use
// the copied instructions.
for (auto &BBMap : RegionMaps)
GlobalMap.insert(BBMap.second.begin(), BBMap.second.end());
// Reset the old insert point for the build. // Reset the old insert point for the build.
Builder.SetInsertPoint(ExitBBCopy->begin()); Builder.SetInsertPoint(ExitBBCopy->begin());
} }
void RegionGenerator::generateScalarLoads(ScopStmt &Stmt,
const Instruction *Inst,
ValueMapT &BBMap) {
// Inside a non-affine region PHI nodes are copied not demoted. Once the
// phi is copied it will reload all inputs from outside the region, hence
// we do not need to generate code for the read access of the operands of a
// PHI.
if (isa<PHINode>(Inst))
return;
return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap);
}
void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
ValueMapT &BBMap,
ValueMapT &GlobalMap) {
const Region &R = Stmt.getParent()->getRegion();
Region *StmtR = Stmt.getRegion();
assert(StmtR && "Block statements need to use the generateScalarStores() "
"function in the BlockGenerator");
BasicBlock *ExitBB = StmtR->getExit();
// For region statements three kinds of scalar stores exists:
// (1) A definition used by a non-phi instruction outside the region.
// (2) A phi-instruction in the region entry.
// (3) A write to a phi instruction in the region exit.
// The last case is the tricky one since we do not know anymore which
// predecessor of the exit needs to store the operand value that doesn't
// have a definition in the region. Therefore, we have to check in each
// block in the region if we should store the value or not.
// Iterate over all accesses in the given statement.
for (MemoryAccess *MA : Stmt) {
// Skip non-scalar and read accesses.
if (!MA->isScalar() || MA->isRead())
continue;
Instruction *ScalarBase = cast<Instruction>(MA->getBaseAddr());
Instruction *ScalarInst = MA->getAccessInstruction();
PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase);
Value *ScalarValue = nullptr;
AllocaInst *ScalarAddr = nullptr;
if (!ScalarBasePHI) {
// Case (1)
ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
ScalarValue = ScalarInst;
} else if (ScalarBasePHI->getParent() != ExitBB) {
// Case (2)
assert(ScalarBasePHI->getParent() == StmtR->getEntry() &&
"Bad PHI self write in non-affine region");
assert(ScalarBase == ScalarInst &&
"Bad PHI self write in non-affine region");
ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a");
ScalarValue = ScalarInst;
} else {
int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB);
// Skip accesses we will not handle in this basic block but in another one
// in the statement region.
if (PHIIdx < 0)
continue;
// Case (3)
ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops");
ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx);
}
ScalarValue =
getNewScalarValue(ScalarValue, R, ScalarMap, BBMap, GlobalMap);
Builder.CreateStore(ScalarValue, ScalarAddr);
}
}
void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI,
PHINode *PHICopy, BasicBlock *IncomingBB,
ValueMapT &GlobalMap,
LoopToScevMapT &LTS) {
Region *StmtR = Stmt.getRegion();
// If the incoming block was not yet copied mark this PHI as incomplete.
// Once the block will be copied the incoming value will be added.
BasicBlock *BBCopy = BlockMap[IncomingBB];
if (!BBCopy) {
assert(StmtR->contains(IncomingBB) &&
"Bad incoming block for PHI in non-affine region");
IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
return;
}
Value *OpCopy = nullptr;
if (StmtR->contains(IncomingBB)) {
assert(RegionMaps.count(BBCopy) &&
"Incoming PHI block did not have a BBMap");
ValueMapT &BBCopyMap = RegionMaps[BBCopy];
Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
OpCopy =
getNewValue(Stmt, Op, BBCopyMap, GlobalMap, LTS, getLoopForInst(PHI));
} else {
if (PHICopy->getBasicBlockIndex(BBCopy) >= 0)
return;
AllocaInst *PHIOpAddr =
getOrCreateAlloca(const_cast<PHINode *>(PHI), PHIOpMap, ".phiops");
OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload",
BlockMap[IncomingBB]->getTerminator());
}
assert(OpCopy && "Incoming PHI value was not copied properly");
assert(BBCopy && "Incoming PHI block was not copied properly");
PHICopy->addIncoming(OpCopy, BBCopy);
}
Value *RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, const PHINode *PHI,
ValueMapT &BBMap,
ValueMapT &GlobalMap,
LoopToScevMapT &LTS) {
unsigned NumIncoming = PHI->getNumIncomingValues();
PHINode *PHICopy =
Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
BBMap[PHI] = PHICopy;
for (unsigned u = 0; u < NumIncoming; u++)
addOperandToPHI(Stmt, PHI, PHICopy, PHI->getIncomingBlock(u), GlobalMap,
LTS);
return PHICopy;
}

View File

@ -131,6 +131,8 @@ public:
NodeBuilder.create(AstRoot); NodeBuilder.create(AstRoot);
NodeBuilder.finalizeSCoP(S);
assert(!verifyGeneratedFunction(S, *EnteringBB->getParent()) && assert(!verifyGeneratedFunction(S, *EnteringBB->getParent()) &&
"Verification of generated function failed"); "Verification of generated function failed");
return true; return true;

View File

@ -0,0 +1,59 @@
; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-model-phi-nodes -polly-codegen < %s | FileCheck %s
;
; void f(int *A, int c, int N) {
; int tmp;
; for (int i = 0; i < N; i++) {
; if (i > c)
; tmp = 3;
; else
; tmp = 5;
; A[i] = tmp;
; }
; }
;
; CHECK-LABEL: bb:
; CHECK: %tmp.0.phiops = alloca i32
; CHECK-LABEL: polly.stmt.bb8:
; CHECK: %tmp.0.phiops.reload = load i32, i32* %tmp.0.phiops
; CHECK: store i32 %tmp.0.phiops.reload, i32*
; CHECK-LABEL: polly.stmt.bb6:
; CHECK: store i32 3, i32* %tmp.0.phiops
; CHECK-LABEL: polly.stmt.bb7:
; CHECK: store i32 5, i32* %tmp.0.phiops
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32 %c, i32 %N) {
bb:
%tmp = sext i32 %N to i64
%tmp1 = sext i32 %c to i64
br label %bb2
bb2: ; preds = %bb10, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb10 ], [ 0, %bb ]
%tmp3 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp3, label %bb4, label %bb11
bb4: ; preds = %bb2
%tmp5 = icmp sgt i64 %indvars.iv, %tmp1
br i1 %tmp5, label %bb6, label %bb7
bb6: ; preds = %bb4
br label %bb8
bb7: ; preds = %bb4
br label %bb8
bb8: ; preds = %bb7, %bb6
%tmp.0 = phi i32 [ 3, %bb6 ], [ 5, %bb7 ]
%tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp.0, i32* %tmp9, align 4
br label %bb10
bb10: ; preds = %bb8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb2
bb11: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,66 @@
; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-model-phi-nodes -disable-polly-intra-scop-scalar-to-array -polly-codegen < %s | FileCheck %s
;
; void f(int *A, int c, int N) {
; int tmp;
; for (int i = 0; i < N; i++) {
; if (i > c)
; tmp = 3;
; else
; tmp = 5;
; A[i] = tmp;
; }
; }
;
; CHECK-LABEL: bb:
; CHECK-DAG: %tmp.0.s2a = alloca i32
; CHECK-DAG: %tmp.0.phiops = alloca i32
; CHECK-LABEL: polly.stmt.bb8:
; CHECK: %tmp.0.phiops.reload = load i32, i32* %tmp.0.phiops
; CHECK: store i32 %tmp.0.phiops.reload, i32* %tmp.0.s2a
; CHECK-LABEL: polly.stmt.bb8b:
; CHECK: %tmp.0.s2a.reload = load i32, i32* %tmp.0.s2a
; CHECK: store i32 %tmp.0.s2a.reload,
; CHECK-LABEL: polly.stmt.bb6:
; CHECK: store i32 3, i32* %tmp.0.phiops
; CHECK-LABEL: polly.stmt.bb7:
; CHECK: store i32 5, i32* %tmp.0.phiops
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32 %c, i32 %N) {
bb:
%tmp = sext i32 %N to i64
%tmp1 = sext i32 %c to i64
br label %bb2
bb2: ; preds = %bb10, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb10 ], [ 0, %bb ]
%tmp3 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp3, label %bb4, label %bb11
bb4: ; preds = %bb2
%tmp5 = icmp sgt i64 %indvars.iv, %tmp1
br i1 %tmp5, label %bb6, label %bb7
bb6: ; preds = %bb4
br label %bb8
bb7: ; preds = %bb4
br label %bb8
bb8: ; preds = %bb7, %bb6
%tmp.0 = phi i32 [ 3, %bb6 ], [ 5, %bb7 ]
br label %bb8b
bb8b:
%tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp.0, i32* %tmp9, align 4
br label %bb10
bb10: ; preds = %bb8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb2
bb11: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -analyze -polly-ast -polly-no-early-exit -polly-detect-unprofitable -polly-model-phi-nodes < %s | FileCheck %s --check-prefix=AST
; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-model-phi-nodes -polly-codegen < %s | FileCheck %s
;
; void jd(int *A, int c) {
; for (int i = 0; i < 1024; i++) {
; if (c)
; A[i] = 1;
; else
; A[i] = 2;
; }
; }
; AST: for (int c0 = 0; c0 <= 1023; c0 += 1) {
; AST: if (c <= -1) {
; AST: Stmt_if_then(c0);
; AST: } else if (c >= 1) {
; AST: Stmt_if_then(c0);
; AST: } else
; AST: Stmt_if_else(c0);
; AST: Stmt_if_end(c0);
; AST: }
;
; CHECK-LABEL: entry:
; CHECK-NEXT: %phi.phiops = alloca i32
; CHECK-LABEL: polly.stmt.if.end:
; CHECK-NEXT: %phi.phiops.reload = load i32, i32* %phi.phiops
; CHECK-NEXT: %scevgep
; CHECK-NEXT: store i32 %phi.phiops.reload, i32*
; CHECK-LABEL: polly.stmt.if.then:
; CHECK-NEXT: store i32 1, i32* %phi.phiops
; CHECK-NEXT: br label %polly.merge{{[.]?}}
; CHECK-LABEL: polly.stmt.if.then{{.}}:
; CHECK-NEXT: store i32 1, i32* %phi.phiops
; CHECK-NEXT: br label %polly.merge{{[.]?}}
; CHECK-LABEL: polly.stmt.if.else:
; CHECK-NEXT: store i32 2, i32* %phi.phiops
; CHECK-NEXT: br label %polly.merge{{[.]?}}
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @jd(i32* %A, i32 %c) {
entry:
br label %for.cond
for.cond:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body:
%tobool = icmp eq i32 %c, 0
br i1 %tobool, label %if.else, label %if.then
if.then:
br label %if.end
if.else:
br label %if.end
if.end:
%phi = phi i32 [ 1, %if.then], [ 2, %if.else ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %phi, i32* %arrayidx, align 4
br label %for.inc
for.inc:
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end:
ret void
}

View File

@ -0,0 +1,67 @@
; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-model-phi-nodes -disable-polly-intra-scop-scalar-to-array -polly-codegen < %s | FileCheck %s
;
; float f(float *A, int N) {
; float tmp = 0;
; for (int i = 0; i < N; i++)
; tmp += A[i];
; }
;
; CHECK: bb:
; CHECK-NOT: %tmp7{{[.*]}} = alloca float
; CHECK-DAG: %tmp.0.s2a = alloca float
; CHECK-NOT: %tmp7{{[.*]}} = alloca float
; CHECK-DAG: %tmp.0.phiops = alloca float
; CHECK-NOT: %tmp7{{[.*]}} = alloca float
;
; CHECK: polly.merge_new_and_old:
; CHECK-NEXT: ret
;
; CHECK: polly.start:
; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops
; CHECK: polly.merge:
; CHECK-NEXT: br label %polly.merge_new_and_old
; CHECK: polly.stmt.bb1{{[0-9]*}}:
; CHECK-NEXT: %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops
; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a
; CHECK: polly.stmt.bb1{{[0-9]*}}:
; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops
; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a
; CHECK: polly.stmt.bb4: ; preds = %polly.then3
; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a
; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_
; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(float* %A, i32 %N) {
bb:
%tmp = sext i32 %N to i64
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%tmp.0 = phi float [ 0.000000e+00, %bb ], [ %tmp7, %bb4 ]
%tmp2 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp2, label %bb3, label %bb8
bb3: ; preds = %bb1
br label %bb4
bb4: ; preds = %bb3
%tmp5 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp6 = load float, float* %tmp5, align 4
%tmp7 = fadd float %tmp.0, %tmp6
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb8: ; preds = %bb1
br label %exit
exit:
ret void
}

View File

@ -0,0 +1,63 @@
; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-model-phi-nodes -disable-polly-intra-scop-scalar-to-array -polly-codegen < %s | FileCheck %s
;
; float f(float *A, int N) {
; float tmp = 0;
; for (int i = 0; i < N; i++)
; tmp += A[i];
; return tmp;
; }
;
; CHECK: polly.merge_new_and_old:
; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ]
; CHECK-NEXT: ret float %tmp.0.merge
;
; CHECK: polly.start:
; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops
; CHECK: polly.merge:
; CHECK-NEXT: %tmp.0.final_reload = load float, float* %tmp.0.s2a
; CHECK-NEXT: br label %polly.merge_new_and_old
; CHECK: polly.stmt.bb1{{[0-9]*}}:
; CHECK-NEXT: %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops
; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a
; CHECK: polly.stmt.bb1{{[0-9]*}}:
; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops
; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a
; CHECK: polly.stmt.bb4: ; preds = %polly.then3
; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a
; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_
; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define float @f(float* %A, i32 %N) {
bb:
%tmp = sext i32 %N to i64
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%tmp.0 = phi float [ 0.000000e+00, %bb ], [ %tmp7, %bb4 ]
%tmp2 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp2, label %bb3, label %bb8
bb3: ; preds = %bb1
br label %bb4
bb4: ; preds = %bb3
%tmp5 = getelementptr inbounds float, float* %A, i64 %indvars.iv
%tmp6 = load float, float* %tmp5, align 4
%tmp7 = fadd float %tmp.0, %tmp6
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb8: ; preds = %bb1
br label %exit
exit:
ret float %tmp.0
}

View File

@ -0,0 +1,93 @@
; RUN: opt %loadPolly -S -polly-detect-unprofitable -polly-model-phi-nodes -disable-polly-intra-scop-scalar-to-array -polly-no-early-exit -polly-codegen < %s | FileCheck %s
;
; int jd(int *restrict A, int x, int N) {
; for (int i = 1; i < N; i++)
; for (int j = 3; j < N; j++)
; x += A[i];
; return x;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @jd(i32* noalias %A, i32 %x, i32 %N) {
entry:
; CHECK-LABEL: entry:
; CHECK-DAG: %x.addr.1.lcssa.s2a = alloca i32
; CHECK-DAG: %x.addr.1.lcssa.phiops = alloca i32
; CHECK-DAG: %x.addr.1.s2a = alloca i32
; CHECK-DAG: %x.addr.1.phiops = alloca i32
; CHECK-DAG: %x.addr.0.s2a = alloca i32
; CHECK-DAG: %x.addr.0.phiops = alloca i32
%tmp = sext i32 %N to i64
br label %for.cond
; CHECK-LABEL: polly.merge_new_and_old:
; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ]
; CHECK: ret i32 %x.addr.0.merge
; CHECK-LABEL: polly.start:
; CHECK-NEXT: store i32 %x, i32* %x.addr.0.phiops
; CHECK-LABEL: polly.merge:
; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a
for.cond: ; preds = %for.inc4, %entry
; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}:
; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops
; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc4 ], [ 1, %entry ]
%x.addr.0 = phi i32 [ %x, %entry ], [ %x.addr.1.lcssa, %for.inc4 ]
%cmp = icmp slt i64 %indvars.iv, %tmp
br i1 %cmp, label %for.body, label %for.end6
; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}:
; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops
; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a
for.body: ; preds = %for.cond
; CHECK-LABEL: polly.stmt.for.body:
; CHECK: %x.addr.0.s2a.reload[[R2:[0-9]*]] = load i32, i32* %x.addr.0.s2a
; CHECK: store i32 %x.addr.0.s2a.reload[[R2]], i32* %x.addr.1.phiops
br label %for.cond1
for.end: ; preds = %for.cond1
; CHECK-LABEL: polly.stmt.for.end:
; CHECK-NEXT: %x.addr.1.lcssa.phiops.reload = load i32, i32* %x.addr.1.lcssa.phiops
; CHECK-NEXT: store i32 %x.addr.1.lcssa.phiops.reload, i32* %x.addr.1.lcssa.s2a[[R4:[0-9]*]]
%x.addr.1.lcssa = phi i32 [ %x.addr.1, %for.cond1 ]
br label %for.inc4
for.inc4: ; preds = %for.end
; CHECK-LABEL: polly.stmt.for.inc4:
; CHECK: %x.addr.1.lcssa.s2a.reload[[R5:[0-9]*]] = load i32, i32* %x.addr.1.lcssa.s2a[[R4]]
; CHECK: store i32 %x.addr.1.lcssa.s2a.reload[[R5]], i32* %x.addr.0.phiops
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.cond1: ; preds = %for.inc, %for.body
; CHECK-LABEL: polly.stmt.for.cond1:
; CHECK: %x.addr.1.phiops.reload = load i32, i32* %x.addr.1.phiops
; CHECK: store i32 %x.addr.1.phiops.reload, i32* %x.addr.1.s2a[[R6:[0-9]*]]
; CHECK: store i32 %x.addr.1.phiops.reload, i32* %x.addr.1.lcssa.phiops
%x.addr.1 = phi i32 [ %x.addr.0, %for.body ], [ %add, %for.inc ]
%j.0 = phi i32 [ 3, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, %N
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
br label %for.inc
for.inc: ; preds = %for.body3
; CHECK-LABEL: polly.stmt.for.inc:
; CHECK: %x.addr.1.s2a.reload[[R3:[0-9]*]] = load i32, i32* %x.addr.1.s2a
; CHECK: %p_add = add nsw i32 %x.addr.1.s2a.reload[[R3]], %tmp1_p_scalar_
; CHECK: store i32 %p_add, i32* %x.addr.1.phiops
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp1 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %x.addr.1, %tmp1
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end6: ; preds = %for.cond
ret i32 %x.addr.0
}

View File

@ -0,0 +1,108 @@
; RUN: opt %loadPolly -S -polly-detect-unprofitable -polly-model-phi-nodes -disable-polly-intra-scop-scalar-to-array -polly-no-early-exit -polly-codegen < %s | FileCheck %s
;
; int jd(int *restrict A, int x, int N, int c) {
; for (int i = 0; i < N; i++)
; for (int j = 0; j < N; j++)
; if (i < c)
; x += A[i];
; return x;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @jd(i32* noalias %A, i32 %x, i32 %N, i32 %c) {
entry:
; CHECK-LABEL: entry:
; CHECK-DAG: %x.addr.2.s2a = alloca i32
; CHECK-DAG: %x.addr.2.phiops = alloca i32
; CHECK-DAG: %x.addr.1.s2a = alloca i32
; CHECK-DAG: %x.addr.1.phiops = alloca i32
; CHECK-DAG: %x.addr.0.s2a = alloca i32
; CHECK-DAG: %x.addr.0.phiops = alloca i32
%tmp = sext i32 %N to i64
%tmp1 = sext i32 %c to i64
br label %for.cond
; CHECK-LABEL: polly.merge_new_and_old:
; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ]
; CHECK: ret i32 %x.addr.0.merge
; CHECK-LABEL: polly.start:
; CHECK-NEXT: store i32 %x, i32* %x.addr.0.phiops
; CHECK-LABEL: polly.merge:
; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a
for.cond: ; preds = %for.inc5, %entry
; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}:
; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops
; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc5 ], [ 0, %entry ]
%x.addr.0 = phi i32 [ %x, %entry ], [ %x.addr.1, %for.inc5 ]
%cmp = icmp slt i64 %indvars.iv, %tmp
br i1 %cmp, label %for.body, label %for.end7
; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}:
; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops
; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a
for.body: ; preds = %for.cond
; CHECK-LABEL: polly.stmt.for.body:
; CHECK: %x.addr.0.s2a.reload[[R2:[0-9]*]] = load i32, i32* %x.addr.0.s2a
; CHECK: store i32 %x.addr.0.s2a.reload[[R2]], i32* %x.addr.1.phiops
br label %for.cond1
for.inc5: ; preds = %for.end
; CHECK-LABEL: polly.stmt.for.inc5:
; CHECK: %x.addr.1.s2a.reload[[R5:[0-9]*]] = load i32, i32* %x.addr.1.s2a
; CHECK: store i32 %x.addr.1.s2a.reload[[R5]], i32* %x.addr.0.phiops
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.cond1: ; preds = %for.inc, %for.body
; CHECK-LABEL: polly.stmt.for.cond1:
; CHECK: %x.addr.1.phiops.reload = load i32, i32* %x.addr.1.phiops
; CHECK: store i32 %x.addr.1.phiops.reload, i32* %x.addr.1.s2a
%x.addr.1 = phi i32 [ %x.addr.0, %for.body ], [ %x.addr.2, %for.inc ]
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, %N
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
; CHECK-LABEL: polly.stmt.for.body3:
; CHECK: %x.addr.1.s2a.reload = load i32, i32* %x.addr.1.s2a
; CHECK: store i32 %x.addr.1.s2a.reload, i32* %x.addr.2.phiops
%cmp4 = icmp slt i64 %indvars.iv, %tmp1
br i1 %cmp4, label %if.then, label %if.end
if.end: ; preds = %if.then, %for.body3
; CHECK-LABEL: polly.stmt.if.end:
; CHECK: %x.addr.2.phiops.reload = load i32, i32* %x.addr.2.phiops
; CHECK: store i32 %x.addr.2.phiops.reload, i32* %x.addr.2.s2a
%x.addr.2 = phi i32 [ %add, %if.then ], [ %x.addr.1, %for.body3 ]
br label %for.inc
for.inc: ; preds = %if.end
; CHECK-LABEL: polly.stmt.for.inc:
; CHECK: %x.addr.2.s2a.reload[[R3:[0-9]*]] = load i32, i32* %x.addr.2.s2a
; CHECK: store i32 %x.addr.2.s2a.reload[[R3]], i32* %x.addr.1.phiops
%inc = add nsw i32 %j.0, 1
br label %for.cond1
if.then: ; preds = %for.body3
; CHECK-LABEL: polly.stmt.if.then:
; CHECK: %x.addr.1.s2a.reload[[R5:[0-9]*]] = load i32, i32* %x.addr.1.s2a
; CHECK: %p_add = add nsw i32 %x.addr.1.s2a.reload[[R5]], %tmp2_p_scalar_
; CHECK: store i32 %p_add, i32* %x.addr.2.phiops
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp2 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %x.addr.1, %tmp2
br label %if.end
for.end: ; preds = %for.cond1
br label %for.inc5
for.end7: ; preds = %for.cond
ret i32 %x.addr.0
}