Allow invariant loads in the SCoP description

This patch allows invariant loads to be used in the SCoP description,
  e.g., as loop bounds, conditions or in memory access functions.

  First we collect "required invariant loads" during SCoP detection that
  would otherwise make an expression we care about non-affine. To this
  end a new level of abstraction was introduced before
  SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and
  ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide
  if we want a load inside the region to be optimistically assumed
  invariant or not. If we do, it will be marked as required and in the
  SCoP generation we bail if it is actually not invariant. If we don't
  it will be a non-affine expression as before. At the moment we
  optimistically assume all "hoistable" (namely non-loop-carried) loads
  to be invariant. This causes us to expand some SCoPs and dismiss them
  later but it also allows us to detect a lot we would dismiss directly
  if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also
  allow potential aliases between optimistically assumed invariant loads
  and other pointers as our runtime alias checks are sound in case the
  loads are actually invariant. Together with the invariant checks this
  combination allows to handle a lot more than LICM can.

  The code generation of the invariant loads had to be extended as we
  can now have dependences between parameters and invariant (hoisted)
  loads as well as the other way around, e.g.,
    test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll
  First, it is important to note that we cannot have real cycles but
  only dependences from a hoisted load to a parameter and from another
  parameter to that hoisted load (and so on). To handle such cases we
  materialize llvm::Values for parameters that are referred by a hoisted
  load on demand and then materialize the remaining parameters. Second,
  there are new kinds of dependences between hoisted loads caused by the
  constraints on their execution. If a hoisted load is conditionally
  executed it might depend on the value of another hoisted load. To deal
  with such situations we sort them already in the ScopInfo such that
  they can be generated in the order they are listed in the
  Scop::InvariantAccesses list (see compareInvariantAccesses). The
  dependences between hoisted loads caused by indirect accesses are
  handled the same way as before.

llvm-svn: 249607
This commit is contained in:
Johannes Doerfert 2015-10-07 20:17:36 +00:00
parent 521dd5842f
commit 09e3697f44
40 changed files with 1395 additions and 158 deletions

View File

@ -203,6 +203,10 @@ protected:
virtual void createMark(__isl_take isl_ast_node *Marker);
virtual void createFor(__isl_take isl_ast_node *For);
/// @brief Preload the memory access at @p AccessRange with @p Build.
Value *preloadUnconditionally(__isl_take isl_set *AccessRange,
isl_ast_build *Build);
/// @brief Preload the memory load access @p MA.
///
/// If @p MA is not always executed it will be conditionally loaded and

View File

@ -48,6 +48,7 @@
#define POLLY_SCOP_DETECTION_H
#include "polly/ScopDetectionDiagnostic.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
@ -146,6 +147,9 @@ private:
using BoxedLoopsMapTy = DenseMap<const Region *, BoxedLoopsSetTy>;
BoxedLoopsMapTy BoxedLoopsMap;
/// @brief Map to remember loads that are required to be invariant.
DenseMap<const Region *, InvariantLoadsSetTy> RequiredInvariantLoadsMap;
/// @brief Context variables for SCoP detection.
struct DetectionContext {
Region &CurRegion; // The region to check.
@ -178,11 +182,15 @@ private:
/// @brief The set of loops contained in non-affine regions.
BoxedLoopsSetTy &BoxedLoopsSet;
/// @brief Loads that need to be invariant during execution.
InvariantLoadsSetTy &RequiredILS;
DetectionContext(Region &R, AliasAnalysis &AA,
NonAffineSubRegionSetTy &NASRS, BoxedLoopsSetTy &BLS,
bool Verify)
InvariantLoadsSetTy &RequiredILS, bool Verify)
: CurRegion(R), AST(AA), Verifying(Verify), Log(&R), hasLoads(false),
hasStores(false), NonAffineSubRegionSet(NASRS), BoxedLoopsSet(BLS) {}
hasStores(false), NonAffineSubRegionSet(NASRS), BoxedLoopsSet(BLS),
RequiredILS(RequiredILS) {}
};
// Remember the valid regions
@ -241,6 +249,18 @@ private:
/// @return True if the call instruction is valid, false otherwise.
static bool isValidCallInst(CallInst &CI);
/// @brief Check if the given loads could be invariant and can be hoisted.
///
/// If true is returned the loads are added to the required invariant loads
/// contained in the @p Context.
///
/// @param RequiredILS The loads to check.
/// @param Context The current detection context.
///
/// @return True if all loads can be assumed invariant.
bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS,
DetectionContext &Context) const;
/// @brief Check if a value is invariant in the region Reg.
///
/// @param Val Value to check for invariance.
@ -300,6 +320,18 @@ private:
bool isValidBranch(BasicBlock &BB, BranchInst *BI, Value *Condition,
bool IsLoopBranch, DetectionContext &Context) const;
/// @brief Check if the SCEV @p S is affine in the current @p Context.
///
/// This will also use a heuristic to decide if we want to require loads to be
/// invariant to make the expression affine or if we want to treat is as
/// non-affine.
///
/// @param S The expression to be checked.
/// @param Context The context of scop detection.
/// @param BaseAddress The base address of the expression @p S (if any).
bool isAffine(const SCEV *S, DetectionContext &Context,
Value *BaseAddress = nullptr) const;
/// @brief Check if the control flow in a basic block is valid.
///
/// @param BB The BB to check the control flow.
@ -369,6 +401,9 @@ public:
/// @brief Return the set of loops in non-affine subregions for @p R.
const BoxedLoopsSetTy *getBoxedLoops(const Region *R) const;
/// @brief Return the set of required invariant loads for @p R.
const InvariantLoadsSetTy *getRequiredInvariantLoads(const Region *R) const;
/// @brief Return true if @p SubR is a non-affine subregion in @p ScopR.
bool isNonAffineSubRegion(const Region *SubR, const Region *ScopR) const;

View File

@ -1193,7 +1193,23 @@ private:
/// @see isIgnored()
void simplifySCoP(bool RemoveIgnoredStmts);
/// @brief Hoist all invariant memory loads.
/// @brief Hoist invariant memory loads and check for required ones.
///
/// We first identify "common" invariant loads, thus loads that are invariant
/// and can be hoisted. Then we check if all required invariant loads have
/// been identified as (common) invariant. A load is a required invariant load
/// if it was assumed to be invariant during SCoP detection, e.g., to assume
/// loop bounds to be affine or runtime alias checks to be placeable. In case
/// a required invariant load was not identified as (common) invariant we will
/// drop this SCoP. An example for both "common" as well as required invariant
/// loads is given below:
///
/// for (int i = 1; i < *LB[0]; i++)
/// for (int j = 1; j < *LB[1]; j++)
/// A[i][j] += A[0][0] + (*V);
///
/// Common inv. loads: V, A[0][0], LB[0], LB[1]
/// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB)
void hoistInvariantLoads();
/// @brief Build the Context of the Scop.
@ -1265,6 +1281,7 @@ public:
//@}
ScalarEvolution *getSE() const;
ScopDetection &getSD() const { return SD; }
/// @brief Get the count of parameters used in this Scop.
///
@ -1596,8 +1613,10 @@ class ScopInfo : public RegionPass {
/// @param L The parent loop of the instruction
/// @param R The region on which to build the data access dictionary.
/// @param BoxedLoops The set of loops that are overapproximated in @p R.
/// @param ScopRIL The required invariant loads equivalence classes.
void buildMemoryAccess(Instruction *Inst, Loop *L, Region *R,
const ScopDetection::BoxedLoopsSetTy *BoxedLoops);
const ScopDetection::BoxedLoopsSetTy *BoxedLoops,
const InvariantLoadsSetTy &ScopRIL);
/// @brief Analyze and extract the cross-BB scalar dependences (or,
/// dataflow dependencies) of an instruction.

View File

@ -12,6 +12,7 @@
#ifndef POLLY_SCEV_VALIDATOR_H
#define POLLY_SCEV_VALIDATOR_H
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/SetVector.h"
#include <vector>
@ -21,6 +22,7 @@ class SCEV;
class ScalarEvolution;
class Value;
class Loop;
class LoadInst;
}
namespace polly {
@ -45,8 +47,8 @@ void findValues(const llvm::SCEV *Expr, llvm::SetVector<llvm::Value *> &Values);
/// @param R The region in which we look for dependences.
bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R);
bool isAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression,
llvm::ScalarEvolution &SE,
const llvm::Value *BaseAddress = 0);
llvm::ScalarEvolution &SE, const llvm::Value *BaseAddress = 0,
InvariantLoadsSetTy *ILS = nullptr);
std::vector<const llvm::SCEV *>
getParamsInAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression,
llvm::ScalarEvolution &SE,

View File

@ -15,12 +15,13 @@
#define POLLY_SUPPORT_IRHELPER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
class Type;
class Instruction;
class LoadInst;
class LoopInfo;
class Loop;
class ScalarEvolution;
@ -45,6 +46,9 @@ class Scop;
using ValueMapT = llvm::DenseMap<llvm::AssertingVH<llvm::Value>,
llvm::AssertingVH<llvm::Value>>;
/// @brief Type for a set of invariant loads.
using InvariantLoadsSetTy = llvm::SetVector<llvm::AssertingVH<llvm::LoadInst>>;
/// Temporary Hack for extended regiontree.
///
/// @brief Cast the region to loop.
@ -105,11 +109,11 @@ void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock, llvm::Pass *P);
/// @param E The expression for which code is actually generated.
/// @param Ty The type of the resulting code.
/// @param IP The insertion point for the new code.
llvm::Value *expandCodeFor(
Scop &S, llvm::ScalarEvolution &SE, const llvm::DataLayout &DL,
const char *Name, const llvm::SCEV *E, llvm::Type *Ty,
llvm::Instruction *IP,
llvm::DenseMap<const llvm::Value *, llvm::Value *> *VMap = nullptr);
/// @param VMap A remaping of values used in @p E.
llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE,
const llvm::DataLayout &DL, const char *Name,
const llvm::SCEV *E, llvm::Type *Ty,
llvm::Instruction *IP, ValueMapT *VMap = nullptr);
/// @brief Check if the block is a error block.
///
@ -133,5 +137,16 @@ bool isErrorBlock(llvm::BasicBlock &BB);
///
/// @return The condition of @p TI and nullptr if none could be extracted.
llvm::Value *getConditionFromTerminator(llvm::TerminatorInst *TI);
/// @brief Check if @p LInst can be hoisted in @p R.
///
/// @param LInst The load to check.
/// @param R The analyzed region.
/// @param LI The loop info.
/// @param SE The scalar evolution analysis.
///
/// @return True if @p LInst can be hoisted in @p R.
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI,
llvm::ScalarEvolution &SE);
}
#endif

View File

@ -51,7 +51,6 @@
#include "polly/ScopDetection.h"
#include "polly/ScopDetectionDiagnostic.h"
#include "polly/Support/SCEVValidator.h"
#include "polly/Support/ScopHelper.h"
#include "polly/Support/ScopLocation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@ -258,9 +257,10 @@ bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
if (Verify) {
BoxedLoopsSetTy DummyBoxedLoopsSet;
NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
InvariantLoadsSetTy DummyILS;
DetectionContext Context(const_cast<Region &>(R), *AA,
DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
false /*verifying*/);
DummyILS, false /*verifying*/);
return isValidRegion(Context);
}
@ -302,15 +302,39 @@ bool ScopDetection::addOverApproximatedRegion(Region *AR,
return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
}
bool ScopDetection::onlyValidRequiredInvariantLoads(
InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
for (LoadInst *Load : RequiredILS)
if (!isHoistableLoad(Load, CurRegion, *LI, *SE))
return false;
Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
return true;
}
bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context,
Value *BaseAddress) const {
InvariantLoadsSetTy AccessILS;
if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS))
return false;
if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
return false;
return true;
}
bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
Value *Condition, bool IsLoopBranch,
DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
Loop *L = LI->getLoopFor(&BB);
const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
if (isAffineExpr(&CurRegion, ConditionSCEV, *SE))
if (isAffine(ConditionSCEV, Context))
return true;
if (!IsLoopBranch && AllowNonAffineSubRegions &&
@ -327,8 +351,6 @@ bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
Value *Condition, bool IsLoopBranch,
DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
// Non constant conditions of branches need to be ICmpInst.
if (!isa<ICmpInst>(Condition)) {
if (!IsLoopBranch && AllowNonAffineSubRegions &&
@ -361,7 +383,7 @@ bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
if (isAffineExpr(&CurRegion, LHS, *SE) && isAffineExpr(&CurRegion, RHS, *SE))
if (isAffine(LHS, Context) && isAffine(RHS, Context))
return true;
if (!IsLoopBranch && AllowNonAffineSubRegions &&
@ -452,18 +474,6 @@ bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const {
if (!isInvariant(*Operand, Reg))
return false;
// When the instruction is a load instruction, check that no write to memory
// in the region aliases with the load.
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
auto Loc = MemoryLocation::get(LI);
// Check if any basic block in the region can modify the location pointed to
// by 'Loc'. If so, 'Val' is (likely) not invariant in the region.
for (const BasicBlock *BB : Reg.blocks())
if (AA->canBasicBlockModify(*BB, Loc))
return false;
}
return true;
}
@ -547,7 +557,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
const Instruction *Insn = Pair.first;
const SCEV *AF = Pair.second;
if (!isAffineExpr(&CurRegion, AF, *SE, BaseValue)) {
if (!isAffine(AF, Context, BaseValue)) {
invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
BaseValue);
if (!KeepGoing)
@ -574,7 +584,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
if (!AF) {
if (isAffineExpr(&CurRegion, Pair.second, *SE, BaseValue))
if (isAffine(Pair.second, Context, BaseValue))
Acc->DelinearizedSubscripts.push_back(Pair.second);
else
IsNonAffine = true;
@ -584,7 +594,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
if (Acc->DelinearizedSubscripts.size() == 0)
IsNonAffine = true;
for (const SCEV *S : Acc->DelinearizedSubscripts)
if (!isAffineExpr(&CurRegion, S, *SE, BaseValue))
if (!isAffine(S, Context, BaseValue))
IsNonAffine = true;
}
@ -655,11 +665,11 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
if (PollyDelinearize && !isVariantInNonAffineLoop) {
Context.Accesses[BasePointer].push_back({&Inst, AccessFunction});
if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
if (!isAffine(AccessFunction, Context, BaseValue))
Context.NonAffineAccesses.insert(BasePointer);
} else if (!AllowNonAffine) {
if (isVariantInNonAffineLoop ||
!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
!isAffine(AccessFunction, Context, BaseValue))
return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true,
AccessFunction, &Inst, BaseValue);
}
@ -693,9 +703,16 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
// the beginning of the SCoP. This breaks if the base pointer is defined
// inside the scop. Hence, we can only create a run-time check if we are
// sure the base pointer is not an instruction defined inside the scop.
// However, we can ignore loads that will be hoisted.
for (const auto &Ptr : AS) {
Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
if (Inst && CurRegion.contains(Inst)) {
auto *Load = dyn_cast<LoadInst>(Inst);
if (Load && isHoistableLoad(Load, CurRegion, *LI, *SE)) {
Context.RequiredILS.insert(Load);
continue;
}
CanBuildRunTimeCheck = false;
break;
}
@ -815,7 +832,8 @@ Region *ScopDetection::expandRegion(Region &R) {
while (ExpandedRegion) {
DetectionContext Context(
*ExpandedRegion, *AA, NonAffineSubRegionMap[ExpandedRegion.get()],
BoxedLoopsMap[ExpandedRegion.get()], false /* verifying */);
BoxedLoopsMap[ExpandedRegion.get()],
RequiredInvariantLoadsMap[ExpandedRegion.get()], false /* verifying */);
DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
// Only expand when we did not collect errors.
@ -877,11 +895,12 @@ void ScopDetection::removeCachedResults(const Region &R) {
ValidRegions.remove(&R);
BoxedLoopsMap.erase(&R);
NonAffineSubRegionMap.erase(&R);
RequiredInvariantLoadsMap.erase(&R);
}
void ScopDetection::findScops(Region &R) {
DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], BoxedLoopsMap[&R],
false /*verifying*/);
RequiredInvariantLoadsMap[&R], false /*verifying*/);
bool RegionIsValid = false;
if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) {
@ -1121,14 +1140,23 @@ ScopDetection::getBoxedLoops(const Region *R) const {
return &BLMIt->second;
}
const InvariantLoadsSetTy *
ScopDetection::getRequiredInvariantLoads(const Region *R) const {
auto I = RequiredInvariantLoadsMap.find(R);
if (I == RequiredInvariantLoadsMap.end())
return nullptr;
return &I->second;
}
void polly::ScopDetection::verifyRegion(const Region &R) const {
assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
BoxedLoopsSetTy DummyBoxedLoopsSet;
NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
InvariantLoadsSetTy DummyILS;
DetectionContext Context(const_cast<Region &>(R), *AA,
DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
true /*verifying*/);
DummyILS, true /*verifying*/);
isValidRegion(Context);
}
@ -1162,6 +1190,7 @@ void ScopDetection::releaseMemory() {
InsnToMemAcc.clear();
BoxedLoopsMap.clear();
NonAffineSubRegionMap.clear();
RequiredInvariantLoadsMap.clear();
// Do not clear the invalid function set.
}

View File

@ -1066,6 +1066,10 @@ void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *GEP) {
isl_local_space *LSpace = isl_local_space_from_space(getDomainSpace());
Type *Ty = GEP->getPointerOperandType();
ScalarEvolution &SE = *Parent.getSE();
ScopDetection &SD = Parent.getSD();
// The set of loads that are required to be invariant.
auto &ScopRIL = *SD.getRequiredInvariantLoads(&Parent.getRegion());
std::vector<const SCEV *> Subscripts;
std::vector<int> Sizes;
@ -1084,7 +1088,16 @@ void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *GEP) {
auto Expr = Subscripts[i + IndexOffset];
auto Size = Sizes[i];
if (!isAffineExpr(&Parent.getRegion(), Expr, SE))
InvariantLoadsSetTy AccessILS;
if (!isAffineExpr(&Parent.getRegion(), Expr, SE, nullptr, &AccessILS))
continue;
bool NonAffine = false;
for (LoadInst *LInst : AccessILS)
if (!ScopRIL.count(LInst))
NonAffine = true;
if (NonAffine)
continue;
isl_pw_aff *AccessOffset = getPwAff(Expr);
@ -2398,7 +2411,9 @@ void Scop::hoistInvariantLoads() {
// TODO: Loads that are not loop carried, hence are in a statement with
// zero iterators, are by construction invariant, though we
// currently "hoist" them anyway.
// currently "hoist" them anyway. This is necessary because we allow
// them to be treated as parameters (e.g., in conditions) and our code
// generation would otherwise use the old value.
BasicBlock *BB = Stmt.isBlockStmt() ? Stmt.getBasicBlock()
: Stmt.getRegion()->getEntry();
@ -2452,6 +2467,76 @@ void Scop::hoistInvariantLoads() {
if (!InvariantAccesses.empty())
IsOptimized = true;
// Check required invariant loads that were tagged during SCoP detection.
for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) {
assert(LI && getRegion().contains(LI));
ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent());
if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) {
DEBUG(dbgs() << "\n\nWARNING: Load (" << *LI
<< ") is required to be invariant but was not marked as "
"such. SCoP for "
<< getRegion() << " will be dropped\n\n");
addAssumption(isl_set_empty(getParamSpace()));
return;
}
}
// We want invariant accesses to be sorted in a "natural order" because there
// might be dependences between invariant loads. These can be caused by
// indirect loads but also because an invariant load is only conditionally
// executed and the condition is dependent on another invariant load. As we
// want to do code generation in a straight forward way, e.g., preload the
// accesses in the list one after another, we sort them such that the
// preloaded values needed in the conditions will always be in front. Before
// we already ordered the accesses such that indirect loads can be resolved,
// thus we use a stable sort here.
auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0,
const InvariantAccessTy &IA1) {
Instruction *AI0 = IA0.first->getAccessInstruction();
Instruction *AI1 = IA1.first->getAccessInstruction();
const SCEV *S0 =
SE->isSCEVable(AI0->getType()) ? SE->getSCEV(AI0) : nullptr;
const SCEV *S1 =
SE->isSCEVable(AI1->getType()) ? SE->getSCEV(AI1) : nullptr;
isl_id *Id0 = getIdForParam(S0);
isl_id *Id1 = getIdForParam(S1);
if (Id0 && !Id1) {
isl_id_free(Id0);
isl_id_free(Id1);
return true;
}
if (!Id0) {
isl_id_free(Id0);
isl_id_free(Id1);
return false;
}
assert(Id0 && Id1);
isl_set *Dom0 = IA0.second;
isl_set *Dom1 = IA1.second;
int Dim0 = isl_set_find_dim_by_id(Dom0, isl_dim_param, Id0);
int Dim1 = isl_set_find_dim_by_id(Dom0, isl_dim_param, Id1);
bool Involves0Id1 = isl_set_involves_dims(Dom0, isl_dim_param, Dim1, 1);
bool Involves1Id0 = isl_set_involves_dims(Dom1, isl_dim_param, Dim0, 1);
assert(!(Involves0Id1 && Involves1Id0));
isl_id_free(Id0);
isl_id_free(Id1);
return Involves1Id0;
};
std::stable_sort(InvariantAccesses.begin(), InvariantAccesses.end(),
compareInvariantAccesses);
}
const ScopArrayInfo *
@ -3091,7 +3176,8 @@ extern MapInsnToMemAcc InsnToMemAcc;
void ScopInfo::buildMemoryAccess(
Instruction *Inst, Loop *L, Region *R,
const ScopDetection::BoxedLoopsSetTy *BoxedLoops) {
const ScopDetection::BoxedLoopsSetTy *BoxedLoops,
const InvariantLoadsSetTy &ScopRIL) {
unsigned Size;
Type *SizeType;
Value *Val;
@ -3138,11 +3224,18 @@ void ScopInfo::buildMemoryAccess(
std::vector<const SCEV *> SizesSCEV;
bool AllAffineSubcripts = true;
for (auto Subscript : Subscripts)
if (!isAffineExpr(R, Subscript, *SE)) {
AllAffineSubcripts = false;
for (auto Subscript : Subscripts) {
InvariantLoadsSetTy AccessILS;
AllAffineSubcripts =
isAffineExpr(R, Subscript, *SE, nullptr, &AccessILS);
for (LoadInst *LInst : AccessILS)
if (!ScopRIL.count(LInst))
AllAffineSubcripts = false;
if (!AllAffineSubcripts)
break;
}
}
if (AllAffineSubcripts && Sizes.size() > 0) {
for (auto V : Sizes)
@ -3176,8 +3269,14 @@ void ScopInfo::buildMemoryAccess(
isVariantInNonAffineLoop = true;
}
bool IsAffine = !isVariantInNonAffineLoop &&
isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue());
InvariantLoadsSetTy AccessILS;
bool IsAffine =
!isVariantInNonAffineLoop &&
isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue(), &AccessILS);
for (LoadInst *LInst : AccessILS)
if (!ScopRIL.count(LInst))
IsAffine = false;
// FIXME: Size of the number of bytes of an array element, not the number of
// elements as probably intended here.
@ -3230,6 +3329,9 @@ void ScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB,
// The set of loops contained in non-affine subregions that are part of R.
const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R);
// The set of loads that are required to be invariant.
auto &ScopRIL = *SD->getRequiredInvariantLoads(&R);
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) {
Instruction *Inst = I;
@ -3241,12 +3343,21 @@ void ScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB,
if (!PHI && IsExitBlock)
break;
// TODO: At this point we only know that elements of ScopRIL have to be
// invariant and will be hoisted for the SCoP to be processed. Though,
// there might be other invariant accesses that will be hoisted and
// that would allow to make a non-affine access affine.
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
buildMemoryAccess(Inst, L, &R, BoxedLoops);
buildMemoryAccess(Inst, L, &R, BoxedLoops, ScopRIL);
if (isIgnoredIntrinsic(Inst))
continue;
// Do not build scalar dependences for required invariant loads as we will
// hoist them later on anyway or drop the SCoP if we cannot.
if (ScopRIL.count(dyn_cast<LoadInst>(Inst)))
continue;
if (buildScalarDependences(Inst, &R, NonAffineSubRegion)) {
if (!isa<StoreInst>(Inst))
addScalarWriteAccess(Inst);

View File

@ -107,7 +107,7 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
if (!isa<SCEVCouldNotCompute>(Scev)) {
const SCEV *NewScev = apply(Scev, LTS, SE);
llvm::ValueToValueMap VTV;
ValueMapT VTV;
VTV.insert(BBMap.begin(), BBMap.end());
VTV.insert(GlobalMap.begin(), GlobalMap.end());
@ -728,9 +728,7 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(
Value *VectorBlockGenerator::generateUnknownStrideLoad(
ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
__isl_keep isl_id_to_ast_expr *NewAccesses
) {
__isl_keep isl_id_to_ast_expr *NewAccesses) {
int VectorWidth = getVectorWidth();
auto *Pointer = Load->getPointerOperand();
VectorType *VectorType = VectorType::get(

View File

@ -142,9 +142,15 @@ public:
BasicBlock *StartBlock =
executeScopConditionally(S, this, Builder.getTrue());
auto SplitBlock = StartBlock->getSinglePredecessor();
// First generate code for the hoisted invariant loads and transitively the
// parameters they reference. Afterwards, for the remaining parameters that
// might reference the hoisted loads. Finally, build the runtime check
// that might reference both hoisted loads as well as parameters.
Builder.SetInsertPoint(SplitBlock->getTerminator());
NodeBuilder.addParameters(S.getContext());
NodeBuilder.preloadInvariantLoads();
NodeBuilder.addParameters(S.getContext());
Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder());
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
Builder.SetInsertPoint(StartBlock->begin());

View File

@ -834,11 +834,8 @@ void IslNodeBuilder::materializeParameters(isl_set *Set, bool All) {
}
}
/// @brief Create the actual preload memory access for @p MA.
static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA,
isl_ast_build *Build,
IslExprBuilder &ExprBuilder) {
isl_set *AccessRange = isl_map_range(MA.getAccessRelation());
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
isl_ast_build *Build) {
isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext());
isl_ast_expr *Access =
@ -850,15 +847,19 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
isl_set *Domain,
isl_ast_build *Build) {
isl_set *AccessRange = isl_map_range(MA.getAccessRelation());
materializeParameters(AccessRange, false);
isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
isl_set_free(Universe);
if (AlwaysExecuted) {
isl_set_free(Domain);
return createPreloadLoad(S, MA, Build, ExprBuilder);
return preloadUnconditionally(AccessRange, Build);
} else {
materializeParameters(Domain, false);
isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
Value *Cond = ExprBuilder.create(DomainCond);
@ -891,7 +892,7 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
Builder.SetInsertPoint(ExecBB->getTerminator());
Instruction *AccInst = MA.getAccessInstruction();
Type *AccInstTy = AccInst->getType();
Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder);
Value *PreAccInst = preloadUnconditionally(AccessRange, Build);
Builder.SetInsertPoint(MergeBB->getTerminator());
auto *MergePHI = Builder.CreatePHI(
@ -994,5 +995,5 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
Instruction *InsertLocation = --(Builder.GetInsertBlock()->end());
return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
InsertLocation);
InsertLocation, &ValueMap);
}

View File

@ -8,6 +8,7 @@
#include <vector>
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polly-scev-validator"
@ -125,10 +126,12 @@ private:
const Region *R;
ScalarEvolution &SE;
const Value *BaseAddress;
InvariantLoadsSetTy *ILS;
public:
SCEVValidator(const Region *R, ScalarEvolution &SE, const Value *BaseAddress)
: R(R), SE(SE), BaseAddress(BaseAddress) {}
SCEVValidator(const Region *R, ScalarEvolution &SE, const Value *BaseAddress,
InvariantLoadsSetTy *ILS)
: R(R), SE(SE), BaseAddress(BaseAddress), ILS(ILS) {}
class ValidatorResult visitConstant(const SCEVConstant *Constant) {
return ValidatorResult(SCEVType::INT);
@ -335,6 +338,15 @@ public:
return ValidatorResult(SCEVType::PARAM, S);
}
ValidatorResult visitLoadInstruction(Instruction *I, const SCEV *S) {
if (R->contains(I) && ILS) {
ILS->insert(cast<LoadInst>(I));
return ValidatorResult(SCEVType::PARAM, S);
}
return visitGenericInst(I, S);
}
ValidatorResult visitSDivInstruction(Instruction *SDiv, const SCEV *S) {
assert(SDiv->getOpcode() == Instruction::SDiv &&
"Assumed SDiv instruction!");
@ -391,6 +403,8 @@ public:
if (Instruction *I = dyn_cast<Instruction>(Expr->getValue())) {
switch (I->getOpcode()) {
case Instruction::Load:
return visitLoadInstruction(I, Expr);
case Instruction::SDiv:
return visitSDivInstruction(I, Expr);
case Instruction::SRem:
@ -550,11 +564,11 @@ bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) {
}
bool isAffineExpr(const Region *R, const SCEV *Expr, ScalarEvolution &SE,
const Value *BaseAddress) {
const Value *BaseAddress, InvariantLoadsSetTy *ILS) {
if (isa<SCEVCouldNotCompute>(Expr))
return false;
SCEVValidator Validator(R, SE, BaseAddress);
SCEVValidator Validator(R, SE, BaseAddress, ILS);
DEBUG({
dbgs() << "\n";
dbgs() << "Expr: " << *Expr << "\n";
@ -580,7 +594,8 @@ std::vector<const SCEV *> getParamsInAffineExpr(const Region *R,
if (isa<SCEVCouldNotCompute>(Expr))
return std::vector<const SCEV *>();
SCEVValidator Validator(R, SE, BaseAddress);
InvariantLoadsSetTy ILS;
SCEVValidator Validator(R, SE, BaseAddress, &ILS);
ValidatorResult Result = Validator.visit(Expr);
assert(Result.isValid() && "Requested parameters for an invalid SCEV!");

View File

@ -14,7 +14,6 @@
#include "polly/Support/ScopHelper.h"
#include "polly/Options.h"
#include "polly/ScopInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@ -240,8 +239,6 @@ void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) {
struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> {
friend struct SCEVVisitor<ScopExpander, const SCEV *>;
typedef llvm::DenseMap<const llvm::Value *, llvm::Value *> ValueMapT;
explicit ScopExpander(const Region &R, ScalarEvolution &SE,
const DataLayout &DL, const char *Name, ValueMapT *VMap)
: Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R),
@ -342,10 +339,9 @@ private:
///}
};
Value *
polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
const char *Name, const SCEV *E, Type *Ty, Instruction *IP,
llvm::DenseMap<const llvm::Value *, llvm::Value *> *VMap) {
Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
const char *Name, const SCEV *E, Type *Ty,
Instruction *IP, ValueMapT *VMap) {
ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap);
return Expander.expandCodeFor(E, Ty, IP);
}
@ -383,3 +379,16 @@ Value *polly::getConditionFromTerminator(TerminatorInst *TI) {
return nullptr;
}
bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI,
ScalarEvolution &SE) {
Loop *L = LI.getLoopFor(LInst->getParent());
const SCEV *PtrSCEV = SE.getSCEVAtScope(LInst->getPointerOperand(), L);
while (L && R.contains(L)) {
if (!SE.isLoopInvariant(PtrSCEV, L))
return false;
L = L->getParentLoop();
}
return true;
}

View File

@ -0,0 +1,38 @@
; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -polly-process-unprofitable -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK-NEXT: %polly.access.BPLoc = getelementptr i32*, i32** %BPLoc, i64 0
; CHECK-NEXT: %polly.access.BPLoc.load = load i32*, i32** %polly.access.BPLoc
;
; CHECK-LABEL: polly.stmt.bb2:
; CHECK-NEXT: %p_tmp3 = getelementptr inbounds i32, i32* %polly.access.BPLoc.load, i64 %polly.indvar
;
; void f(int **BPLoc) {
; for (int i = 0; i < 1024; i++)
; (*BPLoc)[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32** %BPLoc) {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5
bb2: ; preds = %bb1
%tmp = load i32*, i32** %BPLoc, align 8
%tmp3 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv
store i32 0, i32* %tmp3, align 4
br label %bb4
bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,61 @@
; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -polly-process-unprofitable -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK-NEXT: %0 = sext i32 %N to i64
; CHECK-NEXT: %1 = icmp sge i64 %0, 514
; CHECK-NEXT: br label %polly.preload.cond
;
; CHECK-LABEL: polly.preload.cond:
; CHECK-NEXT: br i1 %1, label %polly.preload.exec, label %polly.preload.merge
;
; CHECK-LABEL: polly.preload.merge:
; CHECK-NEXT: %polly.preload.tmp6.merge = phi i32* [ %polly.access.BPLoc.load, %polly.preload.exec ], [ null, %polly.preload.cond ]
;
; CHECK-LABEL: polly.stmt.bb5:
; CHECK-NEXT: %p_tmp7 = getelementptr inbounds i32, i32* %polly.preload.tmp6.merge, i64 %polly.indvar6
;
; void f(int **BPLoc, int *A, int N) {
; for (int i = 0; i < N; i++)
; if (i > 512)
; (*BPLoc)[i] = 0;
; else
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32** %BPLoc, i32* %A, i32 %N) {
bb:
%tmp = sext i32 %N to i64
br label %bb1
bb1: ; preds = %bb11, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ]
%tmp2 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp2, label %bb3, label %bb12
bb3: ; preds = %bb1
%tmp4 = icmp sgt i64 %indvars.iv, 512
br i1 %tmp4, label %bb5, label %bb8
bb5: ; preds = %bb3
%tmp6 = load i32*, i32** %BPLoc, align 8
%tmp7 = getelementptr inbounds i32, i32* %tmp6, i64 %indvars.iv
store i32 0, i32* %tmp7, align 4
br label %bb10
bb8: ; preds = %bb3
%tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp9, align 4
br label %bb10
bb10: ; preds = %bb8, %bb5
br label %bb11
bb11: ; preds = %bb10
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb12: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,54 @@
; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK-NEXT: %polly.access.C = getelementptr i32, i32* %C, i64 0
; CHECK-NEXT: %polly.access.C.load = load i32, i32* %polly.access.C
; CHECK-NOT: %polly.access.C.load = load i32, i32* %polly.access.C
;
; CHECK: polly.cond
; CHECK: %[[R0:[0-9]*]] = sext i32 %polly.access.C.load to i64
; CHECK: %[[R1:[0-9]*]] = icmp sle i64 %[[R0]], -1
;
; CHECK: polly.cond
; CHECK: %[[R2:[0-9]*]] = sext i32 %polly.access.C.load to i64
; CHECK: %[[R3:[0-9]*]] = icmp sge i64 %[[R2]], 1
;
; CHECK-NOT: polly.stmt.bb2
;
; void f(int *A, int *C) {
; for (int i = 0; i < 1024; i++)
; if (*C)
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %C) {
bb:
br label %bb1
bb1: ; preds = %bb7, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb8
bb2: ; preds = %bb1
%tmp = load i32, i32* %C, align 4
%tmp3 = icmp eq i32 %tmp, 0
br i1 %tmp3, label %bb6, label %bb4
bb4: ; preds = %bb2
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp5, align 4
br label %bb6
bb6: ; preds = %bb2, %bb4
br label %bb7
bb7: ; preds = %bb6
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb8: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,69 @@
; RUN: opt %loadPolly -polly-codegen -polly-process-unprofitable -S < %s | FileCheck %s
;
; void fence(void);
;
; void f(int *A, int *B) {
; int i = 0;
; int x = 0;
;
; do {
; x = *B;
; S: A[i] += x;
; } while (i++ < 100);
;
; fence();
;
; do {
; P: A[i]++;
; } while (i++ < x / 2);
; }
;
; CHECK: polly.start:
; CHECK-NEXT: sext i32 %tmp.merge to i64
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %B) {
entry:
br label %stmt.S
stmt.S: ; preds = %do.cond, %entry
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %do.cond ], [ 0, %entry ]
%tmp = load i32, i32* %B, align 4
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv2
%tmp4 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %tmp4, %tmp
store i32 %add, i32* %arrayidx, align 4
br label %do.cond
do.cond: ; preds = %do.body
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
%exitcond = icmp ne i64 %indvars.iv.next3, 101
br i1 %exitcond, label %stmt.S, label %do.end
do.end: ; preds = %do.cond
%tmp5 = trunc i64 101 to i32
call void @fence() #2
%tmp6 = sext i32 %tmp5 to i64
br label %stmt.P
stmt.P: ; preds = %do.cond.5, %do.end
%indvars.iv = phi i64 [ %indvars.iv.next, %do.cond.5 ], [ %tmp6, %do.end ]
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp7 = load i32, i32* %arrayidx3, align 4
%inc4 = add nsw i32 %tmp7, 1
store i32 %inc4, i32* %arrayidx3, align 4
br label %do.cond.5
do.cond.5: ; preds = %do.body.1
%div = sdiv i32 %tmp, 2
%tmp8 = sext i32 %div to i64
%cmp7 = icmp slt i64 %indvars.iv, %tmp8
%indvars.iv.next = add i64 %indvars.iv, 1
br i1 %cmp7, label %stmt.P, label %do.end.8
do.end.8: ; preds = %do.cond.5
ret void
}
declare void @fence()

View File

@ -0,0 +1,34 @@
; RUN: opt %loadPolly -polly-codegen -polly-process-unprofitable -S < %s | FileCheck %s
;
; CHECK: polly.start
;
; void f(int *A, int *UB) {
; for (int i = 0; i < *UB; i++)
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %UB) {
bb:
br label %bb1
bb1: ; preds = %bb6, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb6 ], [ 0, %bb ]
%tmp = load i32, i32* %UB, align 4
%tmp2 = sext i32 %tmp to i64
%tmp3 = icmp slt i64 %indvars.iv, %tmp2
br i1 %tmp3, label %bb4, label %bb7
bb4: ; preds = %bb1
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp5, align 4
br label %bb6
bb6: ; preds = %bb4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb7: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,75 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
;
; This caused the code generation to emit a broken module as there are two
; dependences that need to be considered, thus code has to be emitted in a
; certain order:
; 1) To preload A[N * M] the expression N * M [p0] is needed (both for the
; condition under which A[N * M] is executed as well as to compute the
; index).
; 2) To generate (A[N * M] / 2) [p1] the preloaded value is needed.
;
; SCOP: p0: (%N * %M)
; SCOP: p1: (zext i32 (%tmp4 /u 2) to i64)
;
; CHECK: polly.preload.merge:
; CHECK: %polly.preload.tmp4.merge = phi i32 [ %polly.access.A.load, %polly.preload.exec ], [ 0, %polly.preload.cond ]
; CHECK: %3 = lshr i32 %polly.preload.tmp4.merge, 1
; CHECK: %4 = zext i32 %3 to i64
;
; void f(int *restrict A, int *restrict B, int N, int M) {
;
; for (int i = 0; i < N * M; i++)
; for (int j = 0; j < A[N * M] / 2; j++)
; B[i + j]++;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %B, i32 %N, i32 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc.8, %entry
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc.8 ], [ 0, %entry ]
%mul = mul nsw i32 %N, %M
%tmp = sext i32 %mul to i64
%cmp = icmp slt i64 %indvars.iv2, %tmp
br i1 %cmp, label %for.body, label %for.end.10
for.body: ; preds = %for.cond
br label %for.cond.1
for.cond.1: ; preds = %for.inc, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ]
%mul2 = mul nsw i32 %N, %M
%idxprom = sext i32 %mul2 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
%tmp4 = load i32, i32* %arrayidx, align 4
%div = udiv i32 %tmp4, 2
%tmp5 = sext i32 %div to i64
%cmp3 = icmp slt i64 %indvars.iv, %tmp5
br i1 %cmp3, label %for.body.4, label %for.end
for.body.4: ; preds = %for.cond.1
%tmp6 = add nsw i64 %indvars.iv2, %indvars.iv
%arrayidx6 = getelementptr inbounds i32, i32* %B, i64 %tmp6
%tmp7 = load i32, i32* %arrayidx6, align 4
%inc = add nsw i32 %tmp7, 1
store i32 %inc, i32* %arrayidx6, align 4
br label %for.inc
for.inc: ; preds = %for.body.4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond.1
for.end: ; preds = %for.cond.1
br label %for.inc.8
for.inc.8: ; preds = %for.end
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
br label %for.cond
for.end.10: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,44 @@
; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen -polly-ignore-aliasing -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK: %polly.access.A = getelementptr i32**, i32*** %A, i64 42
; CHECK: %polly.access.A.load = load i32**, i32*** %polly.access.A
; CHECK: %polly.access.polly.access.A.load = getelementptr i32*, i32** %polly.access.A.load, i64 32
; CHECK: %polly.access.polly.access.A.load.load = load i32*, i32** %polly.access.polly.access.A.load
;
; CHECK: polly.stmt.bb2:
; CHECK: %p_tmp6 = getelementptr inbounds i32, i32* %polly.access.polly.access.A.load.load, i64 %polly.indvar
; CHECK: store i32 0, i32* %p_tmp6, align 4
;
; void f(int ***A) {
; for (int i = 0; i < 1024; i++)
; A[42][32][i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32*** %A) {
bb:
br label %bb1
bb1: ; preds = %bb7, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb8
bb2: ; preds = %bb1
%tmp = getelementptr inbounds i32**, i32*** %A, i64 42
%tmp3 = load i32**, i32*** %tmp, align 8
%tmp4 = getelementptr inbounds i32*, i32** %tmp3, i64 32
%tmp5 = load i32*, i32** %tmp4, align 8
%tmp6 = getelementptr inbounds i32, i32* %tmp5, i64 %indvars.iv
store i32 0, i32* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb8: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,44 @@
; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -polly-process-unprofitable -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0
; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B
;
; CHECK-LABEL: polly.stmt.bb2.split:
; CHECK: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar
; CHECK: store i32 %polly.access.B.load, i32* %scevgep, align 4
;
; void f(int *restrict A, int *restrict B) {
; for (int i = 0; i < 1024; i++)
; auto tmp = *B;
; // Split BB
; A[i] = tmp;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %B) {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5
bb2: ; preds = %bb1
%tmp = load i32, i32* %B, align 4
br label %bb2.split
bb2.split:
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp, i32* %tmp3, align 4
br label %bb4
bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -88,5 +88,15 @@ if.end: ; preds = %if.then, %for.end
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (int c0 = 0; c0 <= 1018; c0 += 1)
; CHECK: Stmt_for_body(c0);
; Negative test. At the moment we will optimistically assume RED[0] in the conditional after the
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
; during SCoP generation we will realize that RED[0] is in fact not invariant and bail.
;
; Possible solutions could be:
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
; a lot of invariant cases due to possible aliasing.
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
; rejecting the whole region.
;
; CHECK-NOT: for (int c0 = 0; c0 <= 1018; c0 += 1)
; CHECK-NOT: Stmt_for_body(c0);

View File

@ -29,7 +29,7 @@ return:
}
; CHECK-LABEL: base_pointer_in_condition
; CHECK: Valid Region for Scop: for.i => then
; CHECK: Valid Region for Scop: pre => return
define void @base_pointer_is_argument(float* %A, i64 %n) {
entry:
@ -292,4 +292,4 @@ exit:
}
; CHECK: base_pointer_is_ptr2ptr
; CHECK-NOT: Valid Region for Scop
; CHECK: Valid Region for Scop: for.j => for.i.inc

View File

@ -15,9 +15,9 @@
; PROFIT-NOT: Valid
;
; void f(int * restrict A, int * restrict C) {
; int j;
; int j = 0;
; for (int i = 0; i < 1024; i++) {
; while ((j = C[i]))
; while ((j = C[j]))
; A[j]++;
; }
; }
@ -37,7 +37,8 @@ bb2: ; preds = %bb1
br label %bb3
bb3: ; preds = %bb6, %bb2
%tmp = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
%indvars.j = phi i32 [ %tmp4, %bb6 ], [ 0, %bb2 ]
%tmp = getelementptr inbounds i32, i32* %C, i32 %indvars.j
%tmp4 = load i32, i32* %tmp, align 4
%tmp5 = icmp eq i32 %tmp4, 0
br i1 %tmp5, label %bb11, label %bb6

View File

@ -14,7 +14,8 @@ bb:
loop:
%indvar = phi i64 [ %indvar.next, %loop ], [ 0, %bb ]
%tmp12 = load i64, i64* %tmp1
%gep = getelementptr inbounds i64, i64* %tmp1, i64 %indvar
%tmp12 = load i64, i64* %gep
%tmp13 = mul nsw i64 %tmp12, %tmp4
%ptr = getelementptr inbounds float, float* %B, i64 %tmp13
%val = load float, float* %ptr

View File

@ -13,7 +13,7 @@
; RUN: | FileCheck %s --check-prefix=ALLOWNONAFFINEALL
; void f(int A[], int n) {
; for (int i = 0; i < A[n]; i++)
; for (int i = 0; i < A[n+i]; i++)
; A[i] = 0;
; }
@ -62,7 +62,8 @@ for.body: ; preds = %for.body.lr.ph, %fo
%inc = trunc i64 %1 to i32, !dbg !21
store i32 0, i32* %arrayidx2, align 4, !dbg !24
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !18, metadata !DIExpression()), !dbg !20
%2 = load i32, i32* %arrayidx, align 4, !dbg !21
%arrayidx3 = getelementptr inbounds i32, i32* %arrayidx, i64 %indvar, !dbg !21
%2 = load i32, i32* %arrayidx3, align 4, !dbg !21
%cmp = icmp slt i32 %inc, %2, !dbg !21
%indvar.next = add i64 %indvar, 1, !dbg !21
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21

View File

@ -6,7 +6,7 @@
;
; void a(struct b *A) {
; for (int i=0; i<32; i++)
; A->b[i] = 0;
; A[i].b[i] = 0;
; }
; CHECK: remark: ReportVariantBasePtr01.c:6:8: The following errors keep this region from being a Scop.
@ -23,11 +23,11 @@ entry:
entry.split: ; preds = %entry
tail call void @llvm.dbg.value(metadata %struct.b* %A, i64 0, metadata !16, metadata !DIExpression()), !dbg !23
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !25
%b = getelementptr inbounds %struct.b, %struct.b* %A, i64 0, i32 0, !dbg !26
br label %for.body, !dbg !27
for.body: ; preds = %for.body, %entry.split
%indvar4 = phi i64 [ %indvar.next, %for.body ], [ 0, %entry.split ]
%b = getelementptr inbounds %struct.b, %struct.b* %A, i64 %indvar4, i32 0, !dbg !26
%0 = mul i64 %indvar4, 4, !dbg !26
%1 = add i64 %0, 3, !dbg !26
%2 = add i64 %0, 2, !dbg !26

View File

@ -0,0 +1,57 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, int *B, int *C) {
; for (int i = 0; i < 1000; i++)
; if (A[i] == *B)
; A[i] = *C;
; }
;
; Check that only the access to *B is hoisted but not the one to *C.
;
; CHECK: Invariant Accesses: {
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: { Stmt_for_body__TO__if_end[i0] -> MemRef_B[0] };
; CHECK: Execution Context: { : }
; CHECK: }
;
; CHECK: Statements {
; CHECK: Stmt_for_body__TO__if_end
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: { Stmt_for_body__TO__if_end[i0] -> MemRef_C[0] };
; CHECK: }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %B, i32* %C) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvars.iv, 1000
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp = load i32, i32* %arrayidx, align 4
%tmp1 = load i32, i32* %B, align 4
%cmp1 = icmp eq i32 %tmp, %tmp1
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%tmp2 = load i32, i32* %C, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp2, i32* %arrayidx3, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -39,8 +39,10 @@
; void f(int * restrict A, int * restrict C) {
; int j;
; for (int i = 0; i < 1024; i++) {
; while ((j = C[i]))
; while ((j = C[i++])) {
; A[j]++;
; if (true) break;
; }
; }
; }
;
@ -70,7 +72,7 @@ bb6: ; preds = %bb3
%tmp9 = load i32, i32* %tmp8, align 4
%tmp10 = add nsw i32 %tmp9, 1
store i32 %tmp10, i32* %tmp8, align 4
br label %bb3
br i1 true, label %bb11, label %bb3
bb11: ; preds = %bb3
br label %bb12

View File

@ -6,35 +6,19 @@
; RUN: -analyze < %s | FileCheck %s \
; RUN: --check-prefix=ALL
;
; INNERMOST: Function: f
; INNERMOST: Region: %bb9---%bb17
; INNERMOST: Max Loop Depth: 1
; INNERMOST: Context:
; INNERMOST: [N] -> { :
; INNERMOST-DAG: N >= -2147483648
; INNERMOST-DAG: and
; INNERMOST-DAG: N <= 2147483647
; INNERMOST }
; INNERMOST: Assumed Context:
; INNERMOST: [N] -> { : }
; INNERMOST: p0: %N
; INNERMOST: Alias Groups (0):
; INNERMOST: n/a
; INNERMOST: Statements {
; INNERMOST: Stmt_bb11
; INNERMOST: Domain :=
; INNERMOST: [N] -> { Stmt_bb11[i0] :
; INNERMOST-DAG: i0 >= 0
; INNERMOST-DAG: and
; INNERMOST-DAG: i0 <= -1 + N
; INNERMOST: }
; INNERMOST: Schedule :=
; INNERMOST: [N] -> { Stmt_bb11[i0] -> [i0] };
; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0]
; INNERMOST: [N] -> { Stmt_bb11[i0] -> MemRef_A[i0] };
; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0]
; INNERMOST: [N] -> { Stmt_bb11[i0] -> MemRef_A[i0] };
; INNERMOST: }
; Negative test for INNERMOST.
; At the moment we will optimistically assume A[i] in the conditional before the inner
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
; during SCoP generation we will realize that A[i] is in fact not invariant (in this region = the body
; of the outer loop) and bail.
;
; Possible solutions could be:
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
; a lot of invariant cases due to possible aliasing.
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
; rejecting the whole region.
;
; INNERMOST-NOT: Function: f
;
; ALL: Function: f
; ALL: Region: %bb3---%bb19

View File

@ -10,35 +10,19 @@
; RUN: -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true \
; RUN: -analyze < %s | FileCheck %s --check-prefix=PROFIT
;
; INNERMOST: Function: f
; INNERMOST: Region: %bb9---%bb18
; INNERMOST: Max Loop Depth: 1
; INNERMOST: Context:
; INNERMOST: [p_0] -> { :
; INNERMOST-DAG: p_0 >= -2199023255552
; INNERMOST-DAG: and
; INNERMOST-DAG: p_0 <= 2199023254528
; INNERMOST: }
; INNERMOST: Assumed Context:
; INNERMOST: [p_0] -> { : }
; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3>
; INNERMOST: Alias Groups (0):
; INNERMOST: n/a
; INNERMOST: Statements {
; INNERMOST: Stmt_bb12
; INNERMOST: Domain :=
; INNERMOST: [p_0] -> { Stmt_bb12[i0] :
; INNERMOST-DAG: i0 >= 0
; INNERMOST-DAG: and
; INNERMOST-DAG: i0 <= -1 + p_0
; INNERMOST: }
; INNERMOST: Schedule :=
; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> [i0] };
; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0]
; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> MemRef_A[i0] };
; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0]
; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> MemRef_A[i0] };
; INNERMOST: }
; Negative test for INNERMOST.
; At the moment we will optimistically assume A[i] in the conditional before the inner
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
; during SCoP generation we will realize that A[i] is in fact not invariant (in this region = the body
; of the outer loop) and bail.
;
; Possible solutions could be:
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
; a lot of invariant cases due to possible aliasing.
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
; rejecting the whole region.
;
; INNERMOST-NOT: Function: f
;
; ALL: Function: f
; ALL: Region: %bb3---%bb20

View File

@ -0,0 +1,35 @@
; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-process-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_BPLoc[0] };
;
; void f(int **BPLoc) {
; for (int i = 0; i < 1024; i++)
; (*BPLoc)[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32** %BPLoc) {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5
bb2: ; preds = %bb1
%tmp = load i32*, i32** %BPLoc, align 8
%tmp3 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv
store i32 0, i32* %tmp3, align 4
br label %bb4
bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,51 @@
; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-process-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BPLoc[0] };
;
; void f(int **BPLoc, int *A, int N) {
; for (int i = 0; i < N; i++)
; if (i > 512)
; (*BPLoc)[i] = 0;
; else
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32** %BPLoc, i32* %A, i32 %N) {
bb:
%tmp = sext i32 %N to i64
br label %bb1
bb1: ; preds = %bb11, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ]
%tmp2 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp2, label %bb3, label %bb12
bb3: ; preds = %bb1
%tmp4 = icmp sgt i64 %indvars.iv, 512
br i1 %tmp4, label %bb5, label %bb8
bb5: ; preds = %bb3
%tmp6 = load i32*, i32** %BPLoc, align 8
%tmp7 = getelementptr inbounds i32, i32* %tmp6, i64 %indvars.iv
store i32 0, i32* %tmp7, align 4
br label %bb10
bb8: ; preds = %bb3
%tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp9, align 4
br label %bb10
bb10: ; preds = %bb8, %bb5
br label %bb11
bb11: ; preds = %bb10
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb12: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,43 @@
; RUN: opt %loadPolly -polly-process-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_C[0] };
;
; void f(int *A, int *C) {
; for (int i = 0; i < 1024; i++)
; if (*C)
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %C) {
bb:
br label %bb1
bb1: ; preds = %bb7, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb8
bb2: ; preds = %bb1
%tmp = load i32, i32* %C, align 4
%tmp3 = icmp eq i32 %tmp, 0
br i1 %tmp3, label %bb6, label %bb4
bb4: ; preds = %bb2
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp5, align 4
br label %bb6
bb6: ; preds = %bb2, %bb4
br label %bb7
bb7: ; preds = %bb6
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb8: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,36 @@
; RUN: opt %loadPolly -polly-scops -polly-process-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb1[i0] -> MemRef_UB[0] };
;
; void f(int *A, int *UB) {
; for (int i = 0; i < *UB; i++)
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32* %UB) {
bb:
br label %bb1
bb1: ; preds = %bb6, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb6 ], [ 0, %bb ]
%tmp = load i32, i32* %UB, align 4
%tmp2 = sext i32 %tmp to i64
%tmp3 = icmp slt i64 %indvars.iv, %tmp2
br i1 %tmp3, label %bb4, label %bb7
bb4: ; preds = %bb1
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp5, align 4
br label %bb6
bb6: ; preds = %bb4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb7: ; preds = %bb1
ret void
}

View File

@ -1,6 +1,17 @@
; RUN: opt %loadPolly -tbaa -polly-scops -polly-ignore-aliasing \
; RUN: -analyze < %s | FileCheck %s
;
; Note: The order of the invariant accesses is important because A is the
; base pointer of tmp3 and we will generate code in the same order as
; the invariant accesses are listed here.
;
; CHECK: Invariant Accesses: {
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_A[42]
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_tmp3[32]
; CHECK: }
;
; CHECK: Arrays {
; CHECK: i32** MemRef_A[*][8]
; CHECK: i32* MemRef_tmp3[*][8] [BasePtrOrigin: MemRef_A]
@ -31,11 +42,11 @@ bb1: ; preds = %bb7, %bb
bb2: ; preds = %bb1
%tmp = getelementptr inbounds i32**, i32*** %A, i64 42
%tmp3 = load i32**, i32*** %tmp, align 8, !tbaa !1
%tmp3 = load i32**, i32*** %tmp, align 8
%tmp4 = getelementptr inbounds i32*, i32** %tmp3, i64 32
%tmp5 = load i32*, i32** %tmp4, align 8, !tbaa !1
%tmp5 = load i32*, i32** %tmp4, align 8
%tmp6 = getelementptr inbounds i32, i32* %tmp5, i64 %indvars.iv
store i32 0, i32* %tmp6, align 4, !tbaa !5
store i32 0, i32* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb2
@ -45,11 +56,3 @@ bb7: ; preds = %bb2
bb8: ; preds = %bb1
ret void
}
!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 9e282ff441e7a367dc711e41fd19d27ffc0f78d6)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"any pointer", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"int", !3, i64 0}

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-process-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] };
; CHECK-NOT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NOT: { Stmt_bb2[i0] -> MemRef_tmp[] };
;
; void f(int *restrict A, int *restrict B) {
; for (int i = 0; i < 1024; i++)
; auto tmp = *B;
; // Split BB
; A[i] = tmp;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %B) {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5
bb2: ; preds = %bb1
%tmp = load i32, i32* %B, align 4
br label %bb2b
bb2b:
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp, i32* %tmp3, align 4
br label %bb4
bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,85 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_for_body[i0] -> MemRef_LB[0] };
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] };
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] };
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : (tmp5 >= 1 + tmp and tmp5 >= 6) or tmp >= 6 }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] };
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : tmp <= 5 }
; CHECK-NEXT: }
;
; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB,
; int *restrict LB) {
; for (int i = 0; i < 100; i++) {
; int j = /* invariant load */ *LB;
; do {
; if (j > 5)
; A[i] += /* invariant load */ *V;
; else
; A[i] += /* invariant load */ *U;
; } while (j++ < /* invariant load */ *UB);
; }
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %V, i32* noalias %U, i32* noalias %UB, i32* noalias %LB) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp = load i32, i32* %LB, align 4
br label %do.body
do.body: ; preds = %do.cond, %for.body
%j.0 = phi i32 [ %tmp, %for.body ], [ %inc, %do.cond ]
%cmp1 = icmp sgt i32 %j.0, 5
br i1 %cmp1, label %if.then, label %if.else
if.then: ; preds = %do.body
%tmp1 = load i32, i32* %V, align 4
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp2 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %tmp2, %tmp1
store i32 %add, i32* %arrayidx, align 4
br label %if.end
if.else: ; preds = %do.body
%tmp3 = load i32, i32* %U, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp4 = load i32, i32* %arrayidx3, align 4
%add4 = add nsw i32 %tmp4, %tmp3
store i32 %add4, i32* %arrayidx3, align 4
br label %if.end
if.end: ; preds = %if.else, %if.then
br label %do.cond
do.cond: ; preds = %if.end
%inc = add nsw i32 %j.0, 1
%tmp5 = load i32, i32* %UB, align 4
%cmp5 = icmp slt i32 %j.0, %tmp5
br i1 %cmp5, label %do.body, label %do.end
do.end: ; preds = %do.cond
br label %for.inc
for.inc: ; preds = %do.end
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,63 @@
; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s
;
; Negative test. If we assume UB[*V] to be invariant we get a cyclic
; dependence in the invariant loads that needs to be resolved by
; ignoring the actual accessed address and focusing on the fact
; that the access happened. However, at the moment we assume UB[*V]
; not to be loop invariant, thus reject this region.
;
; CHECK-NOT: Statements
;
;
; void f(int *restrict V, int *restrict UB, int *restrict A) {
; for (int i = 0; i < 100; i++) {
; int j = 0;
; int x = 0;
; do {
; x = /* invariant load dependent on UB[*V] */ *V;
; A[j + i]++;
; } while (j++ < /* invariant load dependent on *V */ UB[x]);
; }
; }
;
target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %V, i32* noalias %UB, i32* noalias %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv2 = phi i32 [ %indvars.iv.next3, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i32 %indvars.iv2, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
br label %do.body
do.body: ; preds = %do.cond, %for.body
%indvars.iv = phi i32 [ %indvars.iv.next, %do.cond ], [ 0, %for.body ]
%tmp = load i32, i32* %V, align 4
%tmp4 = add nuw nsw i32 %indvars.iv, %indvars.iv2
%arrayidx = getelementptr inbounds i32, i32* %A, i32 %tmp4
%tmp5 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %tmp5, 1
store i32 %inc, i32* %arrayidx, align 4
br label %do.cond
do.cond: ; preds = %do.body
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%arrayidx3 = getelementptr inbounds i32, i32* %UB, i32 %tmp
%tmp6 = load i32, i32* %arrayidx3, align 4
%cmp4 = icmp slt i32 %indvars.iv, %tmp6
br i1 %cmp4, label %do.body, label %do.end
do.end: ; preds = %do.cond
br label %for.inc
for.inc: ; preds = %do.end
%indvars.iv.next3 = add nuw nsw i32 %indvars.iv2, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,108 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[2]
; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[1]
; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[0]
; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 }
; CHECK-NEXT: }
;
; CHECK: p0: %tmp
; CHECK: p1: %tmp8
; CHECK: p2: %tmp10
; CHECK: Statements {
; CHECK: Stmt_for_body_6
; CHECK: Domain :=
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 };
; CHECK: Schedule :=
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] };
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
; CHECK: }
;
; int bounds[3];
; double data[1024][1024][1024];
;
; void foo() {
; int i, j, k;
; for (k = 0; k < bounds[2]; k++)
; for (j = 0; j < bounds[1]; j++)
; for (i = 0; i < bounds[0]; i++)
; data[k][j][i] += i + j + k;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@bounds = common global [3 x i32] zeroinitializer, align 4
@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16
define void @foo() {
entry:
br label %for.cond
for.cond: ; preds = %for.inc.16, %entry
%indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ]
%tmp = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 2), align 4
%tmp7 = sext i32 %tmp to i64
%cmp = icmp slt i64 %indvars.iv5, %tmp7
br i1 %cmp, label %for.body, label %for.end.18
for.body: ; preds = %for.cond
br label %for.cond.1
for.cond.1: ; preds = %for.inc.13, %for.body
%indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ]
%tmp8 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 1), align 4
%tmp9 = sext i32 %tmp8 to i64
%cmp2 = icmp slt i64 %indvars.iv3, %tmp9
br i1 %cmp2, label %for.body.3, label %for.end.15
for.body.3: ; preds = %for.cond.1
br label %for.cond.4
for.cond.4: ; preds = %for.inc, %for.body.3
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ]
%tmp10 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 0), align 4
%tmp11 = sext i32 %tmp10 to i64
%cmp5 = icmp slt i64 %indvars.iv, %tmp11
br i1 %cmp5, label %for.body.6, label %for.end
for.body.6: ; preds = %for.cond.4
%tmp12 = add nsw i64 %indvars.iv, %indvars.iv3
%tmp13 = add nsw i64 %tmp12, %indvars.iv5
%tmp14 = trunc i64 %tmp13 to i32
%conv = sitofp i32 %tmp14 to double
%arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv
%tmp15 = load double, double* %arrayidx11, align 8
%add12 = fadd double %tmp15, %conv
store double %add12, double* %arrayidx11, align 8
br label %for.inc
for.inc: ; preds = %for.body.6
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond.4
for.end: ; preds = %for.cond.4
br label %for.inc.13
for.inc.13: ; preds = %for.end
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
br label %for.cond.1
for.end.15: ; preds = %for.cond.1
br label %for.inc.16
for.inc.16: ; preds = %for.end.15
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
br label %for.cond
for.end.18: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,68 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[0]
; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : }
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_bounds[1]
; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : tmp >= 0 }
; CHECK: }
; double A[1000][1000];
; long bounds[2];
;
; void foo() {
;
; for (long i = 0; i <= bounds[0]; i++)
; for (long j = 0; j <= bounds[1]; j++)
; A[i][j] += i + j;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@bounds = common global [2 x i64] zeroinitializer, align 16
@A = common global [1000 x [1000 x double]] zeroinitializer, align 16
define void @foo() {
entry:
br label %for.cond
for.cond: ; preds = %for.inc.6, %entry
%i.0 = phi i64 [ 0, %entry ], [ %inc7, %for.inc.6 ]
%tmp = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 0), align 16
%cmp = icmp sgt i64 %i.0, %tmp
br i1 %cmp, label %for.end.8, label %for.body
for.body: ; preds = %for.cond
br label %for.cond.1
for.cond.1: ; preds = %for.inc, %for.body
%j.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ]
%tmp1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 1), align 8
%cmp2 = icmp sgt i64 %j.0, %tmp1
br i1 %cmp2, label %for.end, label %for.body.3
for.body.3: ; preds = %for.cond.1
%add = add nsw i64 %i.0, %j.0
%conv = sitofp i64 %add to double
%arrayidx4 = getelementptr inbounds [1000 x [1000 x double]], [1000 x [1000 x double]]* @A, i64 0, i64 %i.0, i64 %j.0
%tmp2 = load double, double* %arrayidx4, align 8
%add5 = fadd double %tmp2, %conv
store double %add5, double* %arrayidx4, align 8
br label %for.inc
for.inc: ; preds = %for.body.3
%inc = add nuw nsw i64 %j.0, 1
br label %for.cond.1
for.end: ; preds = %for.cond.1
br label %for.inc.6
for.inc.6: ; preds = %for.end
%inc7 = add nuw nsw i64 %i.0, 1
br label %for.cond
for.end.8: ; preds = %for.cond
ret void
}