forked from OSchip/llvm-project
Allow invariant loads in the SCoP description
This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
This commit is contained in:
parent
521dd5842f
commit
09e3697f44
|
@ -203,6 +203,10 @@ protected:
|
|||
virtual void createMark(__isl_take isl_ast_node *Marker);
|
||||
virtual void createFor(__isl_take isl_ast_node *For);
|
||||
|
||||
/// @brief Preload the memory access at @p AccessRange with @p Build.
|
||||
Value *preloadUnconditionally(__isl_take isl_set *AccessRange,
|
||||
isl_ast_build *Build);
|
||||
|
||||
/// @brief Preload the memory load access @p MA.
|
||||
///
|
||||
/// If @p MA is not always executed it will be conditionally loaded and
|
||||
|
|
|
@ -48,6 +48,7 @@
|
|||
#define POLLY_SCOP_DETECTION_H
|
||||
|
||||
#include "polly/ScopDetectionDiagnostic.h"
|
||||
#include "polly/Support/ScopHelper.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AliasSetTracker.h"
|
||||
|
@ -146,6 +147,9 @@ private:
|
|||
using BoxedLoopsMapTy = DenseMap<const Region *, BoxedLoopsSetTy>;
|
||||
BoxedLoopsMapTy BoxedLoopsMap;
|
||||
|
||||
/// @brief Map to remember loads that are required to be invariant.
|
||||
DenseMap<const Region *, InvariantLoadsSetTy> RequiredInvariantLoadsMap;
|
||||
|
||||
/// @brief Context variables for SCoP detection.
|
||||
struct DetectionContext {
|
||||
Region &CurRegion; // The region to check.
|
||||
|
@ -178,11 +182,15 @@ private:
|
|||
/// @brief The set of loops contained in non-affine regions.
|
||||
BoxedLoopsSetTy &BoxedLoopsSet;
|
||||
|
||||
/// @brief Loads that need to be invariant during execution.
|
||||
InvariantLoadsSetTy &RequiredILS;
|
||||
|
||||
DetectionContext(Region &R, AliasAnalysis &AA,
|
||||
NonAffineSubRegionSetTy &NASRS, BoxedLoopsSetTy &BLS,
|
||||
bool Verify)
|
||||
InvariantLoadsSetTy &RequiredILS, bool Verify)
|
||||
: CurRegion(R), AST(AA), Verifying(Verify), Log(&R), hasLoads(false),
|
||||
hasStores(false), NonAffineSubRegionSet(NASRS), BoxedLoopsSet(BLS) {}
|
||||
hasStores(false), NonAffineSubRegionSet(NASRS), BoxedLoopsSet(BLS),
|
||||
RequiredILS(RequiredILS) {}
|
||||
};
|
||||
|
||||
// Remember the valid regions
|
||||
|
@ -241,6 +249,18 @@ private:
|
|||
/// @return True if the call instruction is valid, false otherwise.
|
||||
static bool isValidCallInst(CallInst &CI);
|
||||
|
||||
/// @brief Check if the given loads could be invariant and can be hoisted.
|
||||
///
|
||||
/// If true is returned the loads are added to the required invariant loads
|
||||
/// contained in the @p Context.
|
||||
///
|
||||
/// @param RequiredILS The loads to check.
|
||||
/// @param Context The current detection context.
|
||||
///
|
||||
/// @return True if all loads can be assumed invariant.
|
||||
bool onlyValidRequiredInvariantLoads(InvariantLoadsSetTy &RequiredILS,
|
||||
DetectionContext &Context) const;
|
||||
|
||||
/// @brief Check if a value is invariant in the region Reg.
|
||||
///
|
||||
/// @param Val Value to check for invariance.
|
||||
|
@ -300,6 +320,18 @@ private:
|
|||
bool isValidBranch(BasicBlock &BB, BranchInst *BI, Value *Condition,
|
||||
bool IsLoopBranch, DetectionContext &Context) const;
|
||||
|
||||
/// @brief Check if the SCEV @p S is affine in the current @p Context.
|
||||
///
|
||||
/// This will also use a heuristic to decide if we want to require loads to be
|
||||
/// invariant to make the expression affine or if we want to treat is as
|
||||
/// non-affine.
|
||||
///
|
||||
/// @param S The expression to be checked.
|
||||
/// @param Context The context of scop detection.
|
||||
/// @param BaseAddress The base address of the expression @p S (if any).
|
||||
bool isAffine(const SCEV *S, DetectionContext &Context,
|
||||
Value *BaseAddress = nullptr) const;
|
||||
|
||||
/// @brief Check if the control flow in a basic block is valid.
|
||||
///
|
||||
/// @param BB The BB to check the control flow.
|
||||
|
@ -369,6 +401,9 @@ public:
|
|||
/// @brief Return the set of loops in non-affine subregions for @p R.
|
||||
const BoxedLoopsSetTy *getBoxedLoops(const Region *R) const;
|
||||
|
||||
/// @brief Return the set of required invariant loads for @p R.
|
||||
const InvariantLoadsSetTy *getRequiredInvariantLoads(const Region *R) const;
|
||||
|
||||
/// @brief Return true if @p SubR is a non-affine subregion in @p ScopR.
|
||||
bool isNonAffineSubRegion(const Region *SubR, const Region *ScopR) const;
|
||||
|
||||
|
|
|
@ -1193,7 +1193,23 @@ private:
|
|||
/// @see isIgnored()
|
||||
void simplifySCoP(bool RemoveIgnoredStmts);
|
||||
|
||||
/// @brief Hoist all invariant memory loads.
|
||||
/// @brief Hoist invariant memory loads and check for required ones.
|
||||
///
|
||||
/// We first identify "common" invariant loads, thus loads that are invariant
|
||||
/// and can be hoisted. Then we check if all required invariant loads have
|
||||
/// been identified as (common) invariant. A load is a required invariant load
|
||||
/// if it was assumed to be invariant during SCoP detection, e.g., to assume
|
||||
/// loop bounds to be affine or runtime alias checks to be placeable. In case
|
||||
/// a required invariant load was not identified as (common) invariant we will
|
||||
/// drop this SCoP. An example for both "common" as well as required invariant
|
||||
/// loads is given below:
|
||||
///
|
||||
/// for (int i = 1; i < *LB[0]; i++)
|
||||
/// for (int j = 1; j < *LB[1]; j++)
|
||||
/// A[i][j] += A[0][0] + (*V);
|
||||
///
|
||||
/// Common inv. loads: V, A[0][0], LB[0], LB[1]
|
||||
/// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB)
|
||||
void hoistInvariantLoads();
|
||||
|
||||
/// @brief Build the Context of the Scop.
|
||||
|
@ -1265,6 +1281,7 @@ public:
|
|||
//@}
|
||||
|
||||
ScalarEvolution *getSE() const;
|
||||
ScopDetection &getSD() const { return SD; }
|
||||
|
||||
/// @brief Get the count of parameters used in this Scop.
|
||||
///
|
||||
|
@ -1596,8 +1613,10 @@ class ScopInfo : public RegionPass {
|
|||
/// @param L The parent loop of the instruction
|
||||
/// @param R The region on which to build the data access dictionary.
|
||||
/// @param BoxedLoops The set of loops that are overapproximated in @p R.
|
||||
/// @param ScopRIL The required invariant loads equivalence classes.
|
||||
void buildMemoryAccess(Instruction *Inst, Loop *L, Region *R,
|
||||
const ScopDetection::BoxedLoopsSetTy *BoxedLoops);
|
||||
const ScopDetection::BoxedLoopsSetTy *BoxedLoops,
|
||||
const InvariantLoadsSetTy &ScopRIL);
|
||||
|
||||
/// @brief Analyze and extract the cross-BB scalar dependences (or,
|
||||
/// dataflow dependencies) of an instruction.
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#ifndef POLLY_SCEV_VALIDATOR_H
|
||||
#define POLLY_SCEV_VALIDATOR_H
|
||||
|
||||
#include "polly/Support/ScopHelper.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include <vector>
|
||||
|
||||
|
@ -21,6 +22,7 @@ class SCEV;
|
|||
class ScalarEvolution;
|
||||
class Value;
|
||||
class Loop;
|
||||
class LoadInst;
|
||||
}
|
||||
|
||||
namespace polly {
|
||||
|
@ -45,8 +47,8 @@ void findValues(const llvm::SCEV *Expr, llvm::SetVector<llvm::Value *> &Values);
|
|||
/// @param R The region in which we look for dependences.
|
||||
bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R);
|
||||
bool isAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression,
|
||||
llvm::ScalarEvolution &SE,
|
||||
const llvm::Value *BaseAddress = 0);
|
||||
llvm::ScalarEvolution &SE, const llvm::Value *BaseAddress = 0,
|
||||
InvariantLoadsSetTy *ILS = nullptr);
|
||||
std::vector<const llvm::SCEV *>
|
||||
getParamsInAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression,
|
||||
llvm::ScalarEvolution &SE,
|
||||
|
|
|
@ -15,12 +15,13 @@
|
|||
#define POLLY_SUPPORT_IRHELPER_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
|
||||
namespace llvm {
|
||||
class Type;
|
||||
class Instruction;
|
||||
class LoadInst;
|
||||
class LoopInfo;
|
||||
class Loop;
|
||||
class ScalarEvolution;
|
||||
|
@ -45,6 +46,9 @@ class Scop;
|
|||
using ValueMapT = llvm::DenseMap<llvm::AssertingVH<llvm::Value>,
|
||||
llvm::AssertingVH<llvm::Value>>;
|
||||
|
||||
/// @brief Type for a set of invariant loads.
|
||||
using InvariantLoadsSetTy = llvm::SetVector<llvm::AssertingVH<llvm::LoadInst>>;
|
||||
|
||||
/// Temporary Hack for extended regiontree.
|
||||
///
|
||||
/// @brief Cast the region to loop.
|
||||
|
@ -105,11 +109,11 @@ void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock, llvm::Pass *P);
|
|||
/// @param E The expression for which code is actually generated.
|
||||
/// @param Ty The type of the resulting code.
|
||||
/// @param IP The insertion point for the new code.
|
||||
llvm::Value *expandCodeFor(
|
||||
Scop &S, llvm::ScalarEvolution &SE, const llvm::DataLayout &DL,
|
||||
const char *Name, const llvm::SCEV *E, llvm::Type *Ty,
|
||||
llvm::Instruction *IP,
|
||||
llvm::DenseMap<const llvm::Value *, llvm::Value *> *VMap = nullptr);
|
||||
/// @param VMap A remaping of values used in @p E.
|
||||
llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE,
|
||||
const llvm::DataLayout &DL, const char *Name,
|
||||
const llvm::SCEV *E, llvm::Type *Ty,
|
||||
llvm::Instruction *IP, ValueMapT *VMap = nullptr);
|
||||
|
||||
/// @brief Check if the block is a error block.
|
||||
///
|
||||
|
@ -133,5 +137,16 @@ bool isErrorBlock(llvm::BasicBlock &BB);
|
|||
///
|
||||
/// @return The condition of @p TI and nullptr if none could be extracted.
|
||||
llvm::Value *getConditionFromTerminator(llvm::TerminatorInst *TI);
|
||||
|
||||
/// @brief Check if @p LInst can be hoisted in @p R.
|
||||
///
|
||||
/// @param LInst The load to check.
|
||||
/// @param R The analyzed region.
|
||||
/// @param LI The loop info.
|
||||
/// @param SE The scalar evolution analysis.
|
||||
///
|
||||
/// @return True if @p LInst can be hoisted in @p R.
|
||||
bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI,
|
||||
llvm::ScalarEvolution &SE);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -51,7 +51,6 @@
|
|||
#include "polly/ScopDetection.h"
|
||||
#include "polly/ScopDetectionDiagnostic.h"
|
||||
#include "polly/Support/SCEVValidator.h"
|
||||
#include "polly/Support/ScopHelper.h"
|
||||
#include "polly/Support/ScopLocation.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
|
@ -258,9 +257,10 @@ bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
|
|||
if (Verify) {
|
||||
BoxedLoopsSetTy DummyBoxedLoopsSet;
|
||||
NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
|
||||
InvariantLoadsSetTy DummyILS;
|
||||
DetectionContext Context(const_cast<Region &>(R), *AA,
|
||||
DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
|
||||
false /*verifying*/);
|
||||
DummyILS, false /*verifying*/);
|
||||
return isValidRegion(Context);
|
||||
}
|
||||
|
||||
|
@ -302,15 +302,39 @@ bool ScopDetection::addOverApproximatedRegion(Region *AR,
|
|||
return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
|
||||
}
|
||||
|
||||
bool ScopDetection::onlyValidRequiredInvariantLoads(
|
||||
InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
|
||||
Region &CurRegion = Context.CurRegion;
|
||||
|
||||
for (LoadInst *Load : RequiredILS)
|
||||
if (!isHoistableLoad(Load, CurRegion, *LI, *SE))
|
||||
return false;
|
||||
|
||||
Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ScopDetection::isAffine(const SCEV *S, DetectionContext &Context,
|
||||
Value *BaseAddress) const {
|
||||
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseAddress, &AccessILS))
|
||||
return false;
|
||||
|
||||
if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
|
||||
Value *Condition, bool IsLoopBranch,
|
||||
DetectionContext &Context) const {
|
||||
Region &CurRegion = Context.CurRegion;
|
||||
|
||||
Loop *L = LI->getLoopFor(&BB);
|
||||
const SCEV *ConditionSCEV = SE->getSCEVAtScope(Condition, L);
|
||||
|
||||
if (isAffineExpr(&CurRegion, ConditionSCEV, *SE))
|
||||
if (isAffine(ConditionSCEV, Context))
|
||||
return true;
|
||||
|
||||
if (!IsLoopBranch && AllowNonAffineSubRegions &&
|
||||
|
@ -327,8 +351,6 @@ bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
|
|||
bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
|
||||
Value *Condition, bool IsLoopBranch,
|
||||
DetectionContext &Context) const {
|
||||
Region &CurRegion = Context.CurRegion;
|
||||
|
||||
// Non constant conditions of branches need to be ICmpInst.
|
||||
if (!isa<ICmpInst>(Condition)) {
|
||||
if (!IsLoopBranch && AllowNonAffineSubRegions &&
|
||||
|
@ -361,7 +383,7 @@ bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
|
|||
const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
|
||||
const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
|
||||
|
||||
if (isAffineExpr(&CurRegion, LHS, *SE) && isAffineExpr(&CurRegion, RHS, *SE))
|
||||
if (isAffine(LHS, Context) && isAffine(RHS, Context))
|
||||
return true;
|
||||
|
||||
if (!IsLoopBranch && AllowNonAffineSubRegions &&
|
||||
|
@ -452,18 +474,6 @@ bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const {
|
|||
if (!isInvariant(*Operand, Reg))
|
||||
return false;
|
||||
|
||||
// When the instruction is a load instruction, check that no write to memory
|
||||
// in the region aliases with the load.
|
||||
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
auto Loc = MemoryLocation::get(LI);
|
||||
|
||||
// Check if any basic block in the region can modify the location pointed to
|
||||
// by 'Loc'. If so, 'Val' is (likely) not invariant in the region.
|
||||
for (const BasicBlock *BB : Reg.blocks())
|
||||
if (AA->canBasicBlockModify(*BB, Loc))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -547,7 +557,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
|
|||
const Instruction *Insn = Pair.first;
|
||||
const SCEV *AF = Pair.second;
|
||||
|
||||
if (!isAffineExpr(&CurRegion, AF, *SE, BaseValue)) {
|
||||
if (!isAffine(AF, Context, BaseValue)) {
|
||||
invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
|
||||
BaseValue);
|
||||
if (!KeepGoing)
|
||||
|
@ -574,7 +584,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
|
|||
MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
|
||||
|
||||
if (!AF) {
|
||||
if (isAffineExpr(&CurRegion, Pair.second, *SE, BaseValue))
|
||||
if (isAffine(Pair.second, Context, BaseValue))
|
||||
Acc->DelinearizedSubscripts.push_back(Pair.second);
|
||||
else
|
||||
IsNonAffine = true;
|
||||
|
@ -584,7 +594,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
|
|||
if (Acc->DelinearizedSubscripts.size() == 0)
|
||||
IsNonAffine = true;
|
||||
for (const SCEV *S : Acc->DelinearizedSubscripts)
|
||||
if (!isAffineExpr(&CurRegion, S, *SE, BaseValue))
|
||||
if (!isAffine(S, Context, BaseValue))
|
||||
IsNonAffine = true;
|
||||
}
|
||||
|
||||
|
@ -655,11 +665,11 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
|
|||
if (PollyDelinearize && !isVariantInNonAffineLoop) {
|
||||
Context.Accesses[BasePointer].push_back({&Inst, AccessFunction});
|
||||
|
||||
if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
|
||||
if (!isAffine(AccessFunction, Context, BaseValue))
|
||||
Context.NonAffineAccesses.insert(BasePointer);
|
||||
} else if (!AllowNonAffine) {
|
||||
if (isVariantInNonAffineLoop ||
|
||||
!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue))
|
||||
!isAffine(AccessFunction, Context, BaseValue))
|
||||
return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true,
|
||||
AccessFunction, &Inst, BaseValue);
|
||||
}
|
||||
|
@ -693,9 +703,16 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
|
|||
// the beginning of the SCoP. This breaks if the base pointer is defined
|
||||
// inside the scop. Hence, we can only create a run-time check if we are
|
||||
// sure the base pointer is not an instruction defined inside the scop.
|
||||
// However, we can ignore loads that will be hoisted.
|
||||
for (const auto &Ptr : AS) {
|
||||
Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
|
||||
if (Inst && CurRegion.contains(Inst)) {
|
||||
auto *Load = dyn_cast<LoadInst>(Inst);
|
||||
if (Load && isHoistableLoad(Load, CurRegion, *LI, *SE)) {
|
||||
Context.RequiredILS.insert(Load);
|
||||
continue;
|
||||
}
|
||||
|
||||
CanBuildRunTimeCheck = false;
|
||||
break;
|
||||
}
|
||||
|
@ -815,7 +832,8 @@ Region *ScopDetection::expandRegion(Region &R) {
|
|||
while (ExpandedRegion) {
|
||||
DetectionContext Context(
|
||||
*ExpandedRegion, *AA, NonAffineSubRegionMap[ExpandedRegion.get()],
|
||||
BoxedLoopsMap[ExpandedRegion.get()], false /* verifying */);
|
||||
BoxedLoopsMap[ExpandedRegion.get()],
|
||||
RequiredInvariantLoadsMap[ExpandedRegion.get()], false /* verifying */);
|
||||
DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
|
||||
// Only expand when we did not collect errors.
|
||||
|
||||
|
@ -877,11 +895,12 @@ void ScopDetection::removeCachedResults(const Region &R) {
|
|||
ValidRegions.remove(&R);
|
||||
BoxedLoopsMap.erase(&R);
|
||||
NonAffineSubRegionMap.erase(&R);
|
||||
RequiredInvariantLoadsMap.erase(&R);
|
||||
}
|
||||
|
||||
void ScopDetection::findScops(Region &R) {
|
||||
DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], BoxedLoopsMap[&R],
|
||||
false /*verifying*/);
|
||||
RequiredInvariantLoadsMap[&R], false /*verifying*/);
|
||||
|
||||
bool RegionIsValid = false;
|
||||
if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) {
|
||||
|
@ -1121,14 +1140,23 @@ ScopDetection::getBoxedLoops(const Region *R) const {
|
|||
return &BLMIt->second;
|
||||
}
|
||||
|
||||
const InvariantLoadsSetTy *
|
||||
ScopDetection::getRequiredInvariantLoads(const Region *R) const {
|
||||
auto I = RequiredInvariantLoadsMap.find(R);
|
||||
if (I == RequiredInvariantLoadsMap.end())
|
||||
return nullptr;
|
||||
return &I->second;
|
||||
}
|
||||
|
||||
void polly::ScopDetection::verifyRegion(const Region &R) const {
|
||||
assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
|
||||
|
||||
BoxedLoopsSetTy DummyBoxedLoopsSet;
|
||||
NonAffineSubRegionSetTy DummyNonAffineSubRegionSet;
|
||||
InvariantLoadsSetTy DummyILS;
|
||||
DetectionContext Context(const_cast<Region &>(R), *AA,
|
||||
DummyNonAffineSubRegionSet, DummyBoxedLoopsSet,
|
||||
true /*verifying*/);
|
||||
DummyILS, true /*verifying*/);
|
||||
isValidRegion(Context);
|
||||
}
|
||||
|
||||
|
@ -1162,6 +1190,7 @@ void ScopDetection::releaseMemory() {
|
|||
InsnToMemAcc.clear();
|
||||
BoxedLoopsMap.clear();
|
||||
NonAffineSubRegionMap.clear();
|
||||
RequiredInvariantLoadsMap.clear();
|
||||
|
||||
// Do not clear the invalid function set.
|
||||
}
|
||||
|
|
|
@ -1066,6 +1066,10 @@ void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *GEP) {
|
|||
isl_local_space *LSpace = isl_local_space_from_space(getDomainSpace());
|
||||
Type *Ty = GEP->getPointerOperandType();
|
||||
ScalarEvolution &SE = *Parent.getSE();
|
||||
ScopDetection &SD = Parent.getSD();
|
||||
|
||||
// The set of loads that are required to be invariant.
|
||||
auto &ScopRIL = *SD.getRequiredInvariantLoads(&Parent.getRegion());
|
||||
|
||||
std::vector<const SCEV *> Subscripts;
|
||||
std::vector<int> Sizes;
|
||||
|
@ -1084,7 +1088,16 @@ void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *GEP) {
|
|||
auto Expr = Subscripts[i + IndexOffset];
|
||||
auto Size = Sizes[i];
|
||||
|
||||
if (!isAffineExpr(&Parent.getRegion(), Expr, SE))
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
if (!isAffineExpr(&Parent.getRegion(), Expr, SE, nullptr, &AccessILS))
|
||||
continue;
|
||||
|
||||
bool NonAffine = false;
|
||||
for (LoadInst *LInst : AccessILS)
|
||||
if (!ScopRIL.count(LInst))
|
||||
NonAffine = true;
|
||||
|
||||
if (NonAffine)
|
||||
continue;
|
||||
|
||||
isl_pw_aff *AccessOffset = getPwAff(Expr);
|
||||
|
@ -2398,7 +2411,9 @@ void Scop::hoistInvariantLoads() {
|
|||
|
||||
// TODO: Loads that are not loop carried, hence are in a statement with
|
||||
// zero iterators, are by construction invariant, though we
|
||||
// currently "hoist" them anyway.
|
||||
// currently "hoist" them anyway. This is necessary because we allow
|
||||
// them to be treated as parameters (e.g., in conditions) and our code
|
||||
// generation would otherwise use the old value.
|
||||
|
||||
BasicBlock *BB = Stmt.isBlockStmt() ? Stmt.getBasicBlock()
|
||||
: Stmt.getRegion()->getEntry();
|
||||
|
@ -2452,6 +2467,76 @@ void Scop::hoistInvariantLoads() {
|
|||
|
||||
if (!InvariantAccesses.empty())
|
||||
IsOptimized = true;
|
||||
|
||||
// Check required invariant loads that were tagged during SCoP detection.
|
||||
for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) {
|
||||
assert(LI && getRegion().contains(LI));
|
||||
ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent());
|
||||
if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) {
|
||||
DEBUG(dbgs() << "\n\nWARNING: Load (" << *LI
|
||||
<< ") is required to be invariant but was not marked as "
|
||||
"such. SCoP for "
|
||||
<< getRegion() << " will be dropped\n\n");
|
||||
addAssumption(isl_set_empty(getParamSpace()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// We want invariant accesses to be sorted in a "natural order" because there
|
||||
// might be dependences between invariant loads. These can be caused by
|
||||
// indirect loads but also because an invariant load is only conditionally
|
||||
// executed and the condition is dependent on another invariant load. As we
|
||||
// want to do code generation in a straight forward way, e.g., preload the
|
||||
// accesses in the list one after another, we sort them such that the
|
||||
// preloaded values needed in the conditions will always be in front. Before
|
||||
// we already ordered the accesses such that indirect loads can be resolved,
|
||||
// thus we use a stable sort here.
|
||||
|
||||
auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0,
|
||||
const InvariantAccessTy &IA1) {
|
||||
Instruction *AI0 = IA0.first->getAccessInstruction();
|
||||
Instruction *AI1 = IA1.first->getAccessInstruction();
|
||||
|
||||
const SCEV *S0 =
|
||||
SE->isSCEVable(AI0->getType()) ? SE->getSCEV(AI0) : nullptr;
|
||||
const SCEV *S1 =
|
||||
SE->isSCEVable(AI1->getType()) ? SE->getSCEV(AI1) : nullptr;
|
||||
|
||||
isl_id *Id0 = getIdForParam(S0);
|
||||
isl_id *Id1 = getIdForParam(S1);
|
||||
|
||||
if (Id0 && !Id1) {
|
||||
isl_id_free(Id0);
|
||||
isl_id_free(Id1);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!Id0) {
|
||||
isl_id_free(Id0);
|
||||
isl_id_free(Id1);
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(Id0 && Id1);
|
||||
|
||||
isl_set *Dom0 = IA0.second;
|
||||
isl_set *Dom1 = IA1.second;
|
||||
|
||||
int Dim0 = isl_set_find_dim_by_id(Dom0, isl_dim_param, Id0);
|
||||
int Dim1 = isl_set_find_dim_by_id(Dom0, isl_dim_param, Id1);
|
||||
|
||||
bool Involves0Id1 = isl_set_involves_dims(Dom0, isl_dim_param, Dim1, 1);
|
||||
bool Involves1Id0 = isl_set_involves_dims(Dom1, isl_dim_param, Dim0, 1);
|
||||
assert(!(Involves0Id1 && Involves1Id0));
|
||||
|
||||
isl_id_free(Id0);
|
||||
isl_id_free(Id1);
|
||||
|
||||
return Involves1Id0;
|
||||
};
|
||||
|
||||
std::stable_sort(InvariantAccesses.begin(), InvariantAccesses.end(),
|
||||
compareInvariantAccesses);
|
||||
}
|
||||
|
||||
const ScopArrayInfo *
|
||||
|
@ -3091,7 +3176,8 @@ extern MapInsnToMemAcc InsnToMemAcc;
|
|||
|
||||
void ScopInfo::buildMemoryAccess(
|
||||
Instruction *Inst, Loop *L, Region *R,
|
||||
const ScopDetection::BoxedLoopsSetTy *BoxedLoops) {
|
||||
const ScopDetection::BoxedLoopsSetTy *BoxedLoops,
|
||||
const InvariantLoadsSetTy &ScopRIL) {
|
||||
unsigned Size;
|
||||
Type *SizeType;
|
||||
Value *Val;
|
||||
|
@ -3138,11 +3224,18 @@ void ScopInfo::buildMemoryAccess(
|
|||
std::vector<const SCEV *> SizesSCEV;
|
||||
|
||||
bool AllAffineSubcripts = true;
|
||||
for (auto Subscript : Subscripts)
|
||||
if (!isAffineExpr(R, Subscript, *SE)) {
|
||||
AllAffineSubcripts = false;
|
||||
for (auto Subscript : Subscripts) {
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
AllAffineSubcripts =
|
||||
isAffineExpr(R, Subscript, *SE, nullptr, &AccessILS);
|
||||
|
||||
for (LoadInst *LInst : AccessILS)
|
||||
if (!ScopRIL.count(LInst))
|
||||
AllAffineSubcripts = false;
|
||||
|
||||
if (!AllAffineSubcripts)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (AllAffineSubcripts && Sizes.size() > 0) {
|
||||
for (auto V : Sizes)
|
||||
|
@ -3176,8 +3269,14 @@ void ScopInfo::buildMemoryAccess(
|
|||
isVariantInNonAffineLoop = true;
|
||||
}
|
||||
|
||||
bool IsAffine = !isVariantInNonAffineLoop &&
|
||||
isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue());
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
bool IsAffine =
|
||||
!isVariantInNonAffineLoop &&
|
||||
isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue(), &AccessILS);
|
||||
|
||||
for (LoadInst *LInst : AccessILS)
|
||||
if (!ScopRIL.count(LInst))
|
||||
IsAffine = false;
|
||||
|
||||
// FIXME: Size of the number of bytes of an array element, not the number of
|
||||
// elements as probably intended here.
|
||||
|
@ -3230,6 +3329,9 @@ void ScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB,
|
|||
// The set of loops contained in non-affine subregions that are part of R.
|
||||
const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R);
|
||||
|
||||
// The set of loads that are required to be invariant.
|
||||
auto &ScopRIL = *SD->getRequiredInvariantLoads(&R);
|
||||
|
||||
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) {
|
||||
Instruction *Inst = I;
|
||||
|
||||
|
@ -3241,12 +3343,21 @@ void ScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB,
|
|||
if (!PHI && IsExitBlock)
|
||||
break;
|
||||
|
||||
// TODO: At this point we only know that elements of ScopRIL have to be
|
||||
// invariant and will be hoisted for the SCoP to be processed. Though,
|
||||
// there might be other invariant accesses that will be hoisted and
|
||||
// that would allow to make a non-affine access affine.
|
||||
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
|
||||
buildMemoryAccess(Inst, L, &R, BoxedLoops);
|
||||
buildMemoryAccess(Inst, L, &R, BoxedLoops, ScopRIL);
|
||||
|
||||
if (isIgnoredIntrinsic(Inst))
|
||||
continue;
|
||||
|
||||
// Do not build scalar dependences for required invariant loads as we will
|
||||
// hoist them later on anyway or drop the SCoP if we cannot.
|
||||
if (ScopRIL.count(dyn_cast<LoadInst>(Inst)))
|
||||
continue;
|
||||
|
||||
if (buildScalarDependences(Inst, &R, NonAffineSubRegion)) {
|
||||
if (!isa<StoreInst>(Inst))
|
||||
addScalarWriteAccess(Inst);
|
||||
|
|
|
@ -107,7 +107,7 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
|
|||
if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
|
||||
if (!isa<SCEVCouldNotCompute>(Scev)) {
|
||||
const SCEV *NewScev = apply(Scev, LTS, SE);
|
||||
llvm::ValueToValueMap VTV;
|
||||
ValueMapT VTV;
|
||||
VTV.insert(BBMap.begin(), BBMap.end());
|
||||
VTV.insert(GlobalMap.begin(), GlobalMap.end());
|
||||
|
||||
|
@ -728,9 +728,7 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(
|
|||
|
||||
Value *VectorBlockGenerator::generateUnknownStrideLoad(
|
||||
ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
|
||||
__isl_keep isl_id_to_ast_expr *NewAccesses
|
||||
|
||||
) {
|
||||
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
||||
int VectorWidth = getVectorWidth();
|
||||
auto *Pointer = Load->getPointerOperand();
|
||||
VectorType *VectorType = VectorType::get(
|
||||
|
|
|
@ -142,9 +142,15 @@ public:
|
|||
BasicBlock *StartBlock =
|
||||
executeScopConditionally(S, this, Builder.getTrue());
|
||||
auto SplitBlock = StartBlock->getSinglePredecessor();
|
||||
|
||||
// First generate code for the hoisted invariant loads and transitively the
|
||||
// parameters they reference. Afterwards, for the remaining parameters that
|
||||
// might reference the hoisted loads. Finally, build the runtime check
|
||||
// that might reference both hoisted loads as well as parameters.
|
||||
Builder.SetInsertPoint(SplitBlock->getTerminator());
|
||||
NodeBuilder.addParameters(S.getContext());
|
||||
NodeBuilder.preloadInvariantLoads();
|
||||
NodeBuilder.addParameters(S.getContext());
|
||||
|
||||
Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder());
|
||||
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
|
||||
Builder.SetInsertPoint(StartBlock->begin());
|
||||
|
|
|
@ -834,11 +834,8 @@ void IslNodeBuilder::materializeParameters(isl_set *Set, bool All) {
|
|||
}
|
||||
}
|
||||
|
||||
/// @brief Create the actual preload memory access for @p MA.
|
||||
static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA,
|
||||
isl_ast_build *Build,
|
||||
IslExprBuilder &ExprBuilder) {
|
||||
isl_set *AccessRange = isl_map_range(MA.getAccessRelation());
|
||||
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
|
||||
isl_ast_build *Build) {
|
||||
isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
|
||||
PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext());
|
||||
isl_ast_expr *Access =
|
||||
|
@ -850,15 +847,19 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
|
|||
isl_set *Domain,
|
||||
isl_ast_build *Build) {
|
||||
|
||||
isl_set *AccessRange = isl_map_range(MA.getAccessRelation());
|
||||
materializeParameters(AccessRange, false);
|
||||
|
||||
isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
|
||||
bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
|
||||
isl_set_free(Universe);
|
||||
|
||||
if (AlwaysExecuted) {
|
||||
isl_set_free(Domain);
|
||||
return createPreloadLoad(S, MA, Build, ExprBuilder);
|
||||
return preloadUnconditionally(AccessRange, Build);
|
||||
} else {
|
||||
|
||||
materializeParameters(Domain, false);
|
||||
isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
|
||||
|
||||
Value *Cond = ExprBuilder.create(DomainCond);
|
||||
|
@ -891,7 +892,7 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
|
|||
Builder.SetInsertPoint(ExecBB->getTerminator());
|
||||
Instruction *AccInst = MA.getAccessInstruction();
|
||||
Type *AccInstTy = AccInst->getType();
|
||||
Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder);
|
||||
Value *PreAccInst = preloadUnconditionally(AccessRange, Build);
|
||||
|
||||
Builder.SetInsertPoint(MergeBB->getTerminator());
|
||||
auto *MergePHI = Builder.CreatePHI(
|
||||
|
@ -994,5 +995,5 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
|
|||
Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
|
||||
Instruction *InsertLocation = --(Builder.GetInsertBlock()->end());
|
||||
return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
|
||||
InsertLocation);
|
||||
InsertLocation, &ValueMap);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace polly;
|
||||
|
||||
#define DEBUG_TYPE "polly-scev-validator"
|
||||
|
||||
|
@ -125,10 +126,12 @@ private:
|
|||
const Region *R;
|
||||
ScalarEvolution &SE;
|
||||
const Value *BaseAddress;
|
||||
InvariantLoadsSetTy *ILS;
|
||||
|
||||
public:
|
||||
SCEVValidator(const Region *R, ScalarEvolution &SE, const Value *BaseAddress)
|
||||
: R(R), SE(SE), BaseAddress(BaseAddress) {}
|
||||
SCEVValidator(const Region *R, ScalarEvolution &SE, const Value *BaseAddress,
|
||||
InvariantLoadsSetTy *ILS)
|
||||
: R(R), SE(SE), BaseAddress(BaseAddress), ILS(ILS) {}
|
||||
|
||||
class ValidatorResult visitConstant(const SCEVConstant *Constant) {
|
||||
return ValidatorResult(SCEVType::INT);
|
||||
|
@ -335,6 +338,15 @@ public:
|
|||
return ValidatorResult(SCEVType::PARAM, S);
|
||||
}
|
||||
|
||||
ValidatorResult visitLoadInstruction(Instruction *I, const SCEV *S) {
|
||||
if (R->contains(I) && ILS) {
|
||||
ILS->insert(cast<LoadInst>(I));
|
||||
return ValidatorResult(SCEVType::PARAM, S);
|
||||
}
|
||||
|
||||
return visitGenericInst(I, S);
|
||||
}
|
||||
|
||||
ValidatorResult visitSDivInstruction(Instruction *SDiv, const SCEV *S) {
|
||||
assert(SDiv->getOpcode() == Instruction::SDiv &&
|
||||
"Assumed SDiv instruction!");
|
||||
|
@ -391,6 +403,8 @@ public:
|
|||
|
||||
if (Instruction *I = dyn_cast<Instruction>(Expr->getValue())) {
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Load:
|
||||
return visitLoadInstruction(I, Expr);
|
||||
case Instruction::SDiv:
|
||||
return visitSDivInstruction(I, Expr);
|
||||
case Instruction::SRem:
|
||||
|
@ -550,11 +564,11 @@ bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) {
|
|||
}
|
||||
|
||||
bool isAffineExpr(const Region *R, const SCEV *Expr, ScalarEvolution &SE,
|
||||
const Value *BaseAddress) {
|
||||
const Value *BaseAddress, InvariantLoadsSetTy *ILS) {
|
||||
if (isa<SCEVCouldNotCompute>(Expr))
|
||||
return false;
|
||||
|
||||
SCEVValidator Validator(R, SE, BaseAddress);
|
||||
SCEVValidator Validator(R, SE, BaseAddress, ILS);
|
||||
DEBUG({
|
||||
dbgs() << "\n";
|
||||
dbgs() << "Expr: " << *Expr << "\n";
|
||||
|
@ -580,7 +594,8 @@ std::vector<const SCEV *> getParamsInAffineExpr(const Region *R,
|
|||
if (isa<SCEVCouldNotCompute>(Expr))
|
||||
return std::vector<const SCEV *>();
|
||||
|
||||
SCEVValidator Validator(R, SE, BaseAddress);
|
||||
InvariantLoadsSetTy ILS;
|
||||
SCEVValidator Validator(R, SE, BaseAddress, &ILS);
|
||||
ValidatorResult Result = Validator.visit(Expr);
|
||||
assert(Result.isValid() && "Requested parameters for an invalid SCEV!");
|
||||
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
#include "polly/Support/ScopHelper.h"
|
||||
#include "polly/Options.h"
|
||||
#include "polly/ScopInfo.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
|
@ -240,8 +239,6 @@ void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) {
|
|||
struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> {
|
||||
friend struct SCEVVisitor<ScopExpander, const SCEV *>;
|
||||
|
||||
typedef llvm::DenseMap<const llvm::Value *, llvm::Value *> ValueMapT;
|
||||
|
||||
explicit ScopExpander(const Region &R, ScalarEvolution &SE,
|
||||
const DataLayout &DL, const char *Name, ValueMapT *VMap)
|
||||
: Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R),
|
||||
|
@ -342,10 +339,9 @@ private:
|
|||
///}
|
||||
};
|
||||
|
||||
Value *
|
||||
polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
|
||||
const char *Name, const SCEV *E, Type *Ty, Instruction *IP,
|
||||
llvm::DenseMap<const llvm::Value *, llvm::Value *> *VMap) {
|
||||
Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
|
||||
const char *Name, const SCEV *E, Type *Ty,
|
||||
Instruction *IP, ValueMapT *VMap) {
|
||||
ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap);
|
||||
return Expander.expandCodeFor(E, Ty, IP);
|
||||
}
|
||||
|
@ -383,3 +379,16 @@ Value *polly::getConditionFromTerminator(TerminatorInst *TI) {
|
|||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI,
|
||||
ScalarEvolution &SE) {
|
||||
Loop *L = LI.getLoopFor(LInst->getParent());
|
||||
const SCEV *PtrSCEV = SE.getSCEVAtScope(LInst->getPointerOperand(), L);
|
||||
while (L && R.contains(L)) {
|
||||
if (!SE.isLoopInvariant(PtrSCEV, L))
|
||||
return false;
|
||||
L = L->getParentLoop();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -polly-process-unprofitable -S < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.begin:
|
||||
; CHECK-NEXT: %polly.access.BPLoc = getelementptr i32*, i32** %BPLoc, i64 0
|
||||
; CHECK-NEXT: %polly.access.BPLoc.load = load i32*, i32** %polly.access.BPLoc
|
||||
;
|
||||
; CHECK-LABEL: polly.stmt.bb2:
|
||||
; CHECK-NEXT: %p_tmp3 = getelementptr inbounds i32, i32* %polly.access.BPLoc.load, i64 %polly.indvar
|
||||
;
|
||||
; void f(int **BPLoc) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; (*BPLoc)[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32** %BPLoc) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb4, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb5
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = load i32*, i32** %BPLoc, align 8
|
||||
%tmp3 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp3, align 4
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb5: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -polly-process-unprofitable -S < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.begin:
|
||||
; CHECK-NEXT: %0 = sext i32 %N to i64
|
||||
; CHECK-NEXT: %1 = icmp sge i64 %0, 514
|
||||
; CHECK-NEXT: br label %polly.preload.cond
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.cond:
|
||||
; CHECK-NEXT: br i1 %1, label %polly.preload.exec, label %polly.preload.merge
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.merge:
|
||||
; CHECK-NEXT: %polly.preload.tmp6.merge = phi i32* [ %polly.access.BPLoc.load, %polly.preload.exec ], [ null, %polly.preload.cond ]
|
||||
;
|
||||
; CHECK-LABEL: polly.stmt.bb5:
|
||||
; CHECK-NEXT: %p_tmp7 = getelementptr inbounds i32, i32* %polly.preload.tmp6.merge, i64 %polly.indvar6
|
||||
;
|
||||
; void f(int **BPLoc, int *A, int N) {
|
||||
; for (int i = 0; i < N; i++)
|
||||
; if (i > 512)
|
||||
; (*BPLoc)[i] = 0;
|
||||
; else
|
||||
; A[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32** %BPLoc, i32* %A, i32 %N) {
|
||||
bb:
|
||||
%tmp = sext i32 %N to i64
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb11, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ]
|
||||
%tmp2 = icmp slt i64 %indvars.iv, %tmp
|
||||
br i1 %tmp2, label %bb3, label %bb12
|
||||
|
||||
bb3: ; preds = %bb1
|
||||
%tmp4 = icmp sgt i64 %indvars.iv, 512
|
||||
br i1 %tmp4, label %bb5, label %bb8
|
||||
|
||||
bb5: ; preds = %bb3
|
||||
%tmp6 = load i32*, i32** %BPLoc, align 8
|
||||
%tmp7 = getelementptr inbounds i32, i32* %tmp6, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp7, align 4
|
||||
br label %bb10
|
||||
|
||||
bb8: ; preds = %bb3
|
||||
%tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp9, align 4
|
||||
br label %bb10
|
||||
|
||||
bb10: ; preds = %bb8, %bb5
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb10
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb12: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen -S < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.begin:
|
||||
; CHECK-NEXT: %polly.access.C = getelementptr i32, i32* %C, i64 0
|
||||
; CHECK-NEXT: %polly.access.C.load = load i32, i32* %polly.access.C
|
||||
; CHECK-NOT: %polly.access.C.load = load i32, i32* %polly.access.C
|
||||
;
|
||||
; CHECK: polly.cond
|
||||
; CHECK: %[[R0:[0-9]*]] = sext i32 %polly.access.C.load to i64
|
||||
; CHECK: %[[R1:[0-9]*]] = icmp sle i64 %[[R0]], -1
|
||||
;
|
||||
; CHECK: polly.cond
|
||||
; CHECK: %[[R2:[0-9]*]] = sext i32 %polly.access.C.load to i64
|
||||
; CHECK: %[[R3:[0-9]*]] = icmp sge i64 %[[R2]], 1
|
||||
;
|
||||
; CHECK-NOT: polly.stmt.bb2
|
||||
;
|
||||
; void f(int *A, int *C) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; if (*C)
|
||||
; A[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* %A, i32* %C) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb7, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb8
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = load i32, i32* %C, align 4
|
||||
%tmp3 = icmp eq i32 %tmp, 0
|
||||
br i1 %tmp3, label %bb6, label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp5, align 4
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb2, %bb4
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb6
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb8: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-process-unprofitable -S < %s | FileCheck %s
|
||||
;
|
||||
; void fence(void);
|
||||
;
|
||||
; void f(int *A, int *B) {
|
||||
; int i = 0;
|
||||
; int x = 0;
|
||||
;
|
||||
; do {
|
||||
; x = *B;
|
||||
; S: A[i] += x;
|
||||
; } while (i++ < 100);
|
||||
;
|
||||
; fence();
|
||||
;
|
||||
; do {
|
||||
; P: A[i]++;
|
||||
; } while (i++ < x / 2);
|
||||
; }
|
||||
;
|
||||
; CHECK: polly.start:
|
||||
; CHECK-NEXT: sext i32 %tmp.merge to i64
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* %A, i32* %B) {
|
||||
entry:
|
||||
br label %stmt.S
|
||||
|
||||
stmt.S: ; preds = %do.cond, %entry
|
||||
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %do.cond ], [ 0, %entry ]
|
||||
%tmp = load i32, i32* %B, align 4
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv2
|
||||
%tmp4 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp4, %tmp
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %do.cond
|
||||
|
||||
do.cond: ; preds = %do.body
|
||||
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next3, 101
|
||||
br i1 %exitcond, label %stmt.S, label %do.end
|
||||
|
||||
do.end: ; preds = %do.cond
|
||||
%tmp5 = trunc i64 101 to i32
|
||||
call void @fence() #2
|
||||
%tmp6 = sext i32 %tmp5 to i64
|
||||
br label %stmt.P
|
||||
|
||||
stmt.P: ; preds = %do.cond.5, %do.end
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %do.cond.5 ], [ %tmp6, %do.end ]
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%tmp7 = load i32, i32* %arrayidx3, align 4
|
||||
%inc4 = add nsw i32 %tmp7, 1
|
||||
store i32 %inc4, i32* %arrayidx3, align 4
|
||||
br label %do.cond.5
|
||||
|
||||
do.cond.5: ; preds = %do.body.1
|
||||
%div = sdiv i32 %tmp, 2
|
||||
%tmp8 = sext i32 %div to i64
|
||||
%cmp7 = icmp slt i64 %indvars.iv, %tmp8
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
br i1 %cmp7, label %stmt.P, label %do.end.8
|
||||
|
||||
do.end.8: ; preds = %do.cond.5
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @fence()
|
|
@ -0,0 +1,34 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-process-unprofitable -S < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: polly.start
|
||||
;
|
||||
; void f(int *A, int *UB) {
|
||||
; for (int i = 0; i < *UB; i++)
|
||||
; A[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* %A, i32* %UB) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb6, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb6 ], [ 0, %bb ]
|
||||
%tmp = load i32, i32* %UB, align 4
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = icmp slt i64 %indvars.iv, %tmp2
|
||||
br i1 %tmp3, label %bb4, label %bb7
|
||||
|
||||
bb4: ; preds = %bb1
|
||||
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp5, align 4
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb7: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP
|
||||
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
|
||||
;
|
||||
; This caused the code generation to emit a broken module as there are two
|
||||
; dependences that need to be considered, thus code has to be emitted in a
|
||||
; certain order:
|
||||
; 1) To preload A[N * M] the expression N * M [p0] is needed (both for the
|
||||
; condition under which A[N * M] is executed as well as to compute the
|
||||
; index).
|
||||
; 2) To generate (A[N * M] / 2) [p1] the preloaded value is needed.
|
||||
;
|
||||
; SCOP: p0: (%N * %M)
|
||||
; SCOP: p1: (zext i32 (%tmp4 /u 2) to i64)
|
||||
;
|
||||
; CHECK: polly.preload.merge:
|
||||
; CHECK: %polly.preload.tmp4.merge = phi i32 [ %polly.access.A.load, %polly.preload.exec ], [ 0, %polly.preload.cond ]
|
||||
; CHECK: %3 = lshr i32 %polly.preload.tmp4.merge, 1
|
||||
; CHECK: %4 = zext i32 %3 to i64
|
||||
;
|
||||
; void f(int *restrict A, int *restrict B, int N, int M) {
|
||||
;
|
||||
; for (int i = 0; i < N * M; i++)
|
||||
; for (int j = 0; j < A[N * M] / 2; j++)
|
||||
; B[i + j]++;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* noalias %A, i32* noalias %B, i32 %N, i32 %M) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc.8, %entry
|
||||
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc.8 ], [ 0, %entry ]
|
||||
%mul = mul nsw i32 %N, %M
|
||||
%tmp = sext i32 %mul to i64
|
||||
%cmp = icmp slt i64 %indvars.iv2, %tmp
|
||||
br i1 %cmp, label %for.body, label %for.end.10
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond.1
|
||||
|
||||
for.cond.1: ; preds = %for.inc, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ]
|
||||
%mul2 = mul nsw i32 %N, %M
|
||||
%idxprom = sext i32 %mul2 to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
|
||||
%tmp4 = load i32, i32* %arrayidx, align 4
|
||||
%div = udiv i32 %tmp4, 2
|
||||
%tmp5 = sext i32 %div to i64
|
||||
%cmp3 = icmp slt i64 %indvars.iv, %tmp5
|
||||
br i1 %cmp3, label %for.body.4, label %for.end
|
||||
|
||||
for.body.4: ; preds = %for.cond.1
|
||||
%tmp6 = add nsw i64 %indvars.iv2, %indvars.iv
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %B, i64 %tmp6
|
||||
%tmp7 = load i32, i32* %arrayidx6, align 4
|
||||
%inc = add nsw i32 %tmp7, 1
|
||||
store i32 %inc, i32* %arrayidx6, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body.4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %for.cond.1
|
||||
|
||||
for.end: ; preds = %for.cond.1
|
||||
br label %for.inc.8
|
||||
|
||||
for.inc.8: ; preds = %for.end
|
||||
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end.10: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen -polly-ignore-aliasing -S < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.begin:
|
||||
; CHECK: %polly.access.A = getelementptr i32**, i32*** %A, i64 42
|
||||
; CHECK: %polly.access.A.load = load i32**, i32*** %polly.access.A
|
||||
; CHECK: %polly.access.polly.access.A.load = getelementptr i32*, i32** %polly.access.A.load, i64 32
|
||||
; CHECK: %polly.access.polly.access.A.load.load = load i32*, i32** %polly.access.polly.access.A.load
|
||||
;
|
||||
; CHECK: polly.stmt.bb2:
|
||||
; CHECK: %p_tmp6 = getelementptr inbounds i32, i32* %polly.access.polly.access.A.load.load, i64 %polly.indvar
|
||||
; CHECK: store i32 0, i32* %p_tmp6, align 4
|
||||
;
|
||||
; void f(int ***A) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; A[42][32][i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32*** %A) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb7, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb8
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = getelementptr inbounds i32**, i32*** %A, i64 42
|
||||
%tmp3 = load i32**, i32*** %tmp, align 8
|
||||
%tmp4 = getelementptr inbounds i32*, i32** %tmp3, i64 32
|
||||
%tmp5 = load i32*, i32** %tmp4, align 8
|
||||
%tmp6 = getelementptr inbounds i32, i32* %tmp5, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp6, align 4
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb2
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb8: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-ignore-aliasing -polly-process-unprofitable -S < %s | FileCheck %s
|
||||
;
|
||||
; CHECK-LABEL: polly.preload.begin:
|
||||
; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0
|
||||
; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B
|
||||
;
|
||||
; CHECK-LABEL: polly.stmt.bb2.split:
|
||||
; CHECK: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar
|
||||
; CHECK: store i32 %polly.access.B.load, i32* %scevgep, align 4
|
||||
;
|
||||
; void f(int *restrict A, int *restrict B) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; auto tmp = *B;
|
||||
; // Split BB
|
||||
; A[i] = tmp;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* noalias %A, i32* noalias %B) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb4, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb5
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = load i32, i32* %B, align 4
|
||||
br label %bb2.split
|
||||
|
||||
bb2.split:
|
||||
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 %tmp, i32* %tmp3, align 4
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb5: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -88,5 +88,15 @@ if.end: ; preds = %if.then, %for.end
|
|||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
||||
|
||||
; CHECK: for (int c0 = 0; c0 <= 1018; c0 += 1)
|
||||
; CHECK: Stmt_for_body(c0);
|
||||
; Negative test. At the moment we will optimistically assume RED[0] in the conditional after the
|
||||
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
|
||||
; during SCoP generation we will realize that RED[0] is in fact not invariant and bail.
|
||||
;
|
||||
; Possible solutions could be:
|
||||
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
|
||||
; a lot of invariant cases due to possible aliasing.
|
||||
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
|
||||
; rejecting the whole region.
|
||||
;
|
||||
; CHECK-NOT: for (int c0 = 0; c0 <= 1018; c0 += 1)
|
||||
; CHECK-NOT: Stmt_for_body(c0);
|
||||
|
|
|
@ -29,7 +29,7 @@ return:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: base_pointer_in_condition
|
||||
; CHECK: Valid Region for Scop: for.i => then
|
||||
; CHECK: Valid Region for Scop: pre => return
|
||||
|
||||
define void @base_pointer_is_argument(float* %A, i64 %n) {
|
||||
entry:
|
||||
|
@ -292,4 +292,4 @@ exit:
|
|||
}
|
||||
|
||||
; CHECK: base_pointer_is_ptr2ptr
|
||||
; CHECK-NOT: Valid Region for Scop
|
||||
; CHECK: Valid Region for Scop: for.j => for.i.inc
|
||||
|
|
|
@ -15,9 +15,9 @@
|
|||
; PROFIT-NOT: Valid
|
||||
;
|
||||
; void f(int * restrict A, int * restrict C) {
|
||||
; int j;
|
||||
; int j = 0;
|
||||
; for (int i = 0; i < 1024; i++) {
|
||||
; while ((j = C[i]))
|
||||
; while ((j = C[j]))
|
||||
; A[j]++;
|
||||
; }
|
||||
; }
|
||||
|
@ -37,7 +37,8 @@ bb2: ; preds = %bb1
|
|||
br label %bb3
|
||||
|
||||
bb3: ; preds = %bb6, %bb2
|
||||
%tmp = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
|
||||
%indvars.j = phi i32 [ %tmp4, %bb6 ], [ 0, %bb2 ]
|
||||
%tmp = getelementptr inbounds i32, i32* %C, i32 %indvars.j
|
||||
%tmp4 = load i32, i32* %tmp, align 4
|
||||
%tmp5 = icmp eq i32 %tmp4, 0
|
||||
br i1 %tmp5, label %bb11, label %bb6
|
||||
|
|
|
@ -14,7 +14,8 @@ bb:
|
|||
|
||||
loop:
|
||||
%indvar = phi i64 [ %indvar.next, %loop ], [ 0, %bb ]
|
||||
%tmp12 = load i64, i64* %tmp1
|
||||
%gep = getelementptr inbounds i64, i64* %tmp1, i64 %indvar
|
||||
%tmp12 = load i64, i64* %gep
|
||||
%tmp13 = mul nsw i64 %tmp12, %tmp4
|
||||
%ptr = getelementptr inbounds float, float* %B, i64 %tmp13
|
||||
%val = load float, float* %ptr
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
; RUN: | FileCheck %s --check-prefix=ALLOWNONAFFINEALL
|
||||
|
||||
; void f(int A[], int n) {
|
||||
; for (int i = 0; i < A[n]; i++)
|
||||
; for (int i = 0; i < A[n+i]; i++)
|
||||
; A[i] = 0;
|
||||
; }
|
||||
|
||||
|
@ -62,7 +62,8 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
|||
%inc = trunc i64 %1 to i32, !dbg !21
|
||||
store i32 0, i32* %arrayidx2, align 4, !dbg !24
|
||||
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !18, metadata !DIExpression()), !dbg !20
|
||||
%2 = load i32, i32* %arrayidx, align 4, !dbg !21
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %arrayidx, i64 %indvar, !dbg !21
|
||||
%2 = load i32, i32* %arrayidx3, align 4, !dbg !21
|
||||
%cmp = icmp slt i32 %inc, %2, !dbg !21
|
||||
%indvar.next = add i64 %indvar, 1, !dbg !21
|
||||
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
;
|
||||
; void a(struct b *A) {
|
||||
; for (int i=0; i<32; i++)
|
||||
; A->b[i] = 0;
|
||||
; A[i].b[i] = 0;
|
||||
; }
|
||||
|
||||
; CHECK: remark: ReportVariantBasePtr01.c:6:8: The following errors keep this region from being a Scop.
|
||||
|
@ -23,11 +23,11 @@ entry:
|
|||
entry.split: ; preds = %entry
|
||||
tail call void @llvm.dbg.value(metadata %struct.b* %A, i64 0, metadata !16, metadata !DIExpression()), !dbg !23
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !25
|
||||
%b = getelementptr inbounds %struct.b, %struct.b* %A, i64 0, i32 0, !dbg !26
|
||||
br label %for.body, !dbg !27
|
||||
|
||||
for.body: ; preds = %for.body, %entry.split
|
||||
%indvar4 = phi i64 [ %indvar.next, %for.body ], [ 0, %entry.split ]
|
||||
%b = getelementptr inbounds %struct.b, %struct.b* %A, i64 %indvar4, i32 0, !dbg !26
|
||||
%0 = mul i64 %indvar4, 4, !dbg !26
|
||||
%1 = add i64 %0, 3, !dbg !26
|
||||
%2 = add i64 %0, 2, !dbg !26
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; void f(int *A, int *B, int *C) {
|
||||
; for (int i = 0; i < 1000; i++)
|
||||
; if (A[i] == *B)
|
||||
; A[i] = *C;
|
||||
; }
|
||||
;
|
||||
; Check that only the access to *B is hoisted but not the one to *C.
|
||||
;
|
||||
; CHECK: Invariant Accesses: {
|
||||
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK: { Stmt_for_body__TO__if_end[i0] -> MemRef_B[0] };
|
||||
; CHECK: Execution Context: { : }
|
||||
; CHECK: }
|
||||
;
|
||||
; CHECK: Statements {
|
||||
; CHECK: Stmt_for_body__TO__if_end
|
||||
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK: { Stmt_for_body__TO__if_end[i0] -> MemRef_C[0] };
|
||||
; CHECK: }
|
||||
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* %A, i32* %B, i32* %C) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1000
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%tmp = load i32, i32* %arrayidx, align 4
|
||||
%tmp1 = load i32, i32* %B, align 4
|
||||
%cmp1 = icmp eq i32 %tmp, %tmp1
|
||||
br i1 %cmp1, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%tmp2 = load i32, i32* %C, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 %tmp2, i32* %arrayidx3, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %for.body
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.end
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -39,8 +39,10 @@
|
|||
; void f(int * restrict A, int * restrict C) {
|
||||
; int j;
|
||||
; for (int i = 0; i < 1024; i++) {
|
||||
; while ((j = C[i]))
|
||||
; while ((j = C[i++])) {
|
||||
; A[j]++;
|
||||
; if (true) break;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
;
|
||||
|
@ -70,7 +72,7 @@ bb6: ; preds = %bb3
|
|||
%tmp9 = load i32, i32* %tmp8, align 4
|
||||
%tmp10 = add nsw i32 %tmp9, 1
|
||||
store i32 %tmp10, i32* %tmp8, align 4
|
||||
br label %bb3
|
||||
br i1 true, label %bb11, label %bb3
|
||||
|
||||
bb11: ; preds = %bb3
|
||||
br label %bb12
|
||||
|
|
|
@ -6,35 +6,19 @@
|
|||
; RUN: -analyze < %s | FileCheck %s \
|
||||
; RUN: --check-prefix=ALL
|
||||
;
|
||||
; INNERMOST: Function: f
|
||||
; INNERMOST: Region: %bb9---%bb17
|
||||
; INNERMOST: Max Loop Depth: 1
|
||||
; INNERMOST: Context:
|
||||
; INNERMOST: [N] -> { :
|
||||
; INNERMOST-DAG: N >= -2147483648
|
||||
; INNERMOST-DAG: and
|
||||
; INNERMOST-DAG: N <= 2147483647
|
||||
; INNERMOST }
|
||||
; INNERMOST: Assumed Context:
|
||||
; INNERMOST: [N] -> { : }
|
||||
; INNERMOST: p0: %N
|
||||
; INNERMOST: Alias Groups (0):
|
||||
; INNERMOST: n/a
|
||||
; INNERMOST: Statements {
|
||||
; INNERMOST: Stmt_bb11
|
||||
; INNERMOST: Domain :=
|
||||
; INNERMOST: [N] -> { Stmt_bb11[i0] :
|
||||
; INNERMOST-DAG: i0 >= 0
|
||||
; INNERMOST-DAG: and
|
||||
; INNERMOST-DAG: i0 <= -1 + N
|
||||
; INNERMOST: }
|
||||
; INNERMOST: Schedule :=
|
||||
; INNERMOST: [N] -> { Stmt_bb11[i0] -> [i0] };
|
||||
; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0]
|
||||
; INNERMOST: [N] -> { Stmt_bb11[i0] -> MemRef_A[i0] };
|
||||
; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0]
|
||||
; INNERMOST: [N] -> { Stmt_bb11[i0] -> MemRef_A[i0] };
|
||||
; INNERMOST: }
|
||||
; Negative test for INNERMOST.
|
||||
; At the moment we will optimistically assume A[i] in the conditional before the inner
|
||||
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
|
||||
; during SCoP generation we will realize that A[i] is in fact not invariant (in this region = the body
|
||||
; of the outer loop) and bail.
|
||||
;
|
||||
; Possible solutions could be:
|
||||
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
|
||||
; a lot of invariant cases due to possible aliasing.
|
||||
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
|
||||
; rejecting the whole region.
|
||||
;
|
||||
; INNERMOST-NOT: Function: f
|
||||
;
|
||||
; ALL: Function: f
|
||||
; ALL: Region: %bb3---%bb19
|
||||
|
|
|
@ -10,35 +10,19 @@
|
|||
; RUN: -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true \
|
||||
; RUN: -analyze < %s | FileCheck %s --check-prefix=PROFIT
|
||||
;
|
||||
; INNERMOST: Function: f
|
||||
; INNERMOST: Region: %bb9---%bb18
|
||||
; INNERMOST: Max Loop Depth: 1
|
||||
; INNERMOST: Context:
|
||||
; INNERMOST: [p_0] -> { :
|
||||
; INNERMOST-DAG: p_0 >= -2199023255552
|
||||
; INNERMOST-DAG: and
|
||||
; INNERMOST-DAG: p_0 <= 2199023254528
|
||||
; INNERMOST: }
|
||||
; INNERMOST: Assumed Context:
|
||||
; INNERMOST: [p_0] -> { : }
|
||||
; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3>
|
||||
; INNERMOST: Alias Groups (0):
|
||||
; INNERMOST: n/a
|
||||
; INNERMOST: Statements {
|
||||
; INNERMOST: Stmt_bb12
|
||||
; INNERMOST: Domain :=
|
||||
; INNERMOST: [p_0] -> { Stmt_bb12[i0] :
|
||||
; INNERMOST-DAG: i0 >= 0
|
||||
; INNERMOST-DAG: and
|
||||
; INNERMOST-DAG: i0 <= -1 + p_0
|
||||
; INNERMOST: }
|
||||
; INNERMOST: Schedule :=
|
||||
; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> [i0] };
|
||||
; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0]
|
||||
; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> MemRef_A[i0] };
|
||||
; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0]
|
||||
; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> MemRef_A[i0] };
|
||||
; INNERMOST: }
|
||||
; Negative test for INNERMOST.
|
||||
; At the moment we will optimistically assume A[i] in the conditional before the inner
|
||||
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
|
||||
; during SCoP generation we will realize that A[i] is in fact not invariant (in this region = the body
|
||||
; of the outer loop) and bail.
|
||||
;
|
||||
; Possible solutions could be:
|
||||
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
|
||||
; a lot of invariant cases due to possible aliasing.
|
||||
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
|
||||
; rejecting the whole region.
|
||||
;
|
||||
; INNERMOST-NOT: Function: f
|
||||
;
|
||||
; ALL: Function: f
|
||||
; ALL: Region: %bb3---%bb20
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-process-unprofitable -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses:
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_BPLoc[0] };
|
||||
;
|
||||
; void f(int **BPLoc) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; (*BPLoc)[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32** %BPLoc) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb4, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb5
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = load i32*, i32** %BPLoc, align 8
|
||||
%tmp3 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp3, align 4
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb5: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-process-unprofitable -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses:
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BPLoc[0] };
|
||||
;
|
||||
; void f(int **BPLoc, int *A, int N) {
|
||||
; for (int i = 0; i < N; i++)
|
||||
; if (i > 512)
|
||||
; (*BPLoc)[i] = 0;
|
||||
; else
|
||||
; A[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32** %BPLoc, i32* %A, i32 %N) {
|
||||
bb:
|
||||
%tmp = sext i32 %N to i64
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb11, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ]
|
||||
%tmp2 = icmp slt i64 %indvars.iv, %tmp
|
||||
br i1 %tmp2, label %bb3, label %bb12
|
||||
|
||||
bb3: ; preds = %bb1
|
||||
%tmp4 = icmp sgt i64 %indvars.iv, 512
|
||||
br i1 %tmp4, label %bb5, label %bb8
|
||||
|
||||
bb5: ; preds = %bb3
|
||||
%tmp6 = load i32*, i32** %BPLoc, align 8
|
||||
%tmp7 = getelementptr inbounds i32, i32* %tmp6, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp7, align 4
|
||||
br label %bb10
|
||||
|
||||
bb8: ; preds = %bb3
|
||||
%tmp9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp9, align 4
|
||||
br label %bb10
|
||||
|
||||
bb10: ; preds = %bb8, %bb5
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb10
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb12: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
; RUN: opt %loadPolly -polly-process-unprofitable -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses:
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_C[0] };
|
||||
;
|
||||
; void f(int *A, int *C) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; if (*C)
|
||||
; A[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* %A, i32* %C) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb7, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb8
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = load i32, i32* %C, align 4
|
||||
%tmp3 = icmp eq i32 %tmp, 0
|
||||
br i1 %tmp3, label %bb6, label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp5, align 4
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb2, %bb4
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb6
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb8: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: opt %loadPolly -polly-scops -polly-process-unprofitable -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses:
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bb1[i0] -> MemRef_UB[0] };
|
||||
;
|
||||
; void f(int *A, int *UB) {
|
||||
; for (int i = 0; i < *UB; i++)
|
||||
; A[i] = 0;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* %A, i32* %UB) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb6, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb6 ], [ 0, %bb ]
|
||||
%tmp = load i32, i32* %UB, align 4
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = icmp slt i64 %indvars.iv, %tmp2
|
||||
br i1 %tmp3, label %bb4, label %bb7
|
||||
|
||||
bb4: ; preds = %bb1
|
||||
%tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp5, align 4
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb7: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -1,6 +1,17 @@
|
|||
; RUN: opt %loadPolly -tbaa -polly-scops -polly-ignore-aliasing \
|
||||
; RUN: -analyze < %s | FileCheck %s
|
||||
;
|
||||
; Note: The order of the invariant accesses is important because A is the
|
||||
; base pointer of tmp3 and we will generate code in the same order as
|
||||
; the invariant accesses are listed here.
|
||||
;
|
||||
; CHECK: Invariant Accesses: {
|
||||
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK: MemRef_A[42]
|
||||
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK: MemRef_tmp3[32]
|
||||
; CHECK: }
|
||||
;
|
||||
; CHECK: Arrays {
|
||||
; CHECK: i32** MemRef_A[*][8]
|
||||
; CHECK: i32* MemRef_tmp3[*][8] [BasePtrOrigin: MemRef_A]
|
||||
|
@ -31,11 +42,11 @@ bb1: ; preds = %bb7, %bb
|
|||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = getelementptr inbounds i32**, i32*** %A, i64 42
|
||||
%tmp3 = load i32**, i32*** %tmp, align 8, !tbaa !1
|
||||
%tmp3 = load i32**, i32*** %tmp, align 8
|
||||
%tmp4 = getelementptr inbounds i32*, i32** %tmp3, i64 32
|
||||
%tmp5 = load i32*, i32** %tmp4, align 8, !tbaa !1
|
||||
%tmp5 = load i32*, i32** %tmp4, align 8
|
||||
%tmp6 = getelementptr inbounds i32, i32* %tmp5, i64 %indvars.iv
|
||||
store i32 0, i32* %tmp6, align 4, !tbaa !5
|
||||
store i32 0, i32* %tmp6, align 4
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb2
|
||||
|
@ -45,11 +56,3 @@ bb7: ; preds = %bb2
|
|||
bb8: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 9e282ff441e7a367dc711e41fd19d27ffc0f78d6)"}
|
||||
!1 = !{!2, !2, i64 0}
|
||||
!2 = !{!"any pointer", !3, i64 0}
|
||||
!3 = !{!"omnipotent char", !4, i64 0}
|
||||
!4 = !{!"Simple C/C++ TBAA"}
|
||||
!5 = !{!6, !6, i64 0}
|
||||
!6 = !{!"int", !3, i64 0}
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
; RUN: opt %loadPolly -polly-process-unprofitable -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses:
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] };
|
||||
; CHECK-NOT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
|
||||
; CHECK-NOT: { Stmt_bb2[i0] -> MemRef_tmp[] };
|
||||
;
|
||||
; void f(int *restrict A, int *restrict B) {
|
||||
; for (int i = 0; i < 1024; i++)
|
||||
; auto tmp = *B;
|
||||
; // Split BB
|
||||
; A[i] = tmp;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* noalias %A, i32* noalias %B) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb4, %bb
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb5
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = load i32, i32* %B, align 4
|
||||
br label %bb2b
|
||||
|
||||
bb2b:
|
||||
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
store i32 %tmp, i32* %tmp3, align 4
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb2
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %bb1
|
||||
|
||||
bb5: ; preds = %bb1
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses: {
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_for_body[i0] -> MemRef_LB[0] };
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] };
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] };
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : (tmp5 >= 1 + tmp and tmp5 >= 6) or tmp >= 6 }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] };
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : tmp <= 5 }
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB,
|
||||
; int *restrict LB) {
|
||||
; for (int i = 0; i < 100; i++) {
|
||||
; int j = /* invariant load */ *LB;
|
||||
; do {
|
||||
; if (j > 5)
|
||||
; A[i] += /* invariant load */ *V;
|
||||
; else
|
||||
; A[i] += /* invariant load */ *U;
|
||||
; } while (j++ < /* invariant load */ *UB);
|
||||
; }
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* noalias %A, i32* noalias %V, i32* noalias %U, i32* noalias %UB, i32* noalias %LB) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 100
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%tmp = load i32, i32* %LB, align 4
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.cond, %for.body
|
||||
%j.0 = phi i32 [ %tmp, %for.body ], [ %inc, %do.cond ]
|
||||
%cmp1 = icmp sgt i32 %j.0, 5
|
||||
br i1 %cmp1, label %if.then, label %if.else
|
||||
|
||||
if.then: ; preds = %do.body
|
||||
%tmp1 = load i32, i32* %V, align 4
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%tmp2 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %tmp2, %tmp1
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %do.body
|
||||
%tmp3 = load i32, i32* %U, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%tmp4 = load i32, i32* %arrayidx3, align 4
|
||||
%add4 = add nsw i32 %tmp4, %tmp3
|
||||
store i32 %add4, i32* %arrayidx3, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
br label %do.cond
|
||||
|
||||
do.cond: ; preds = %if.end
|
||||
%inc = add nsw i32 %j.0, 1
|
||||
%tmp5 = load i32, i32* %UB, align 4
|
||||
%cmp5 = icmp slt i32 %j.0, %tmp5
|
||||
br i1 %cmp5, label %do.body, label %do.end
|
||||
|
||||
do.end: ; preds = %do.cond
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %do.end
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s
|
||||
;
|
||||
; Negative test. If we assume UB[*V] to be invariant we get a cyclic
|
||||
; dependence in the invariant loads that needs to be resolved by
|
||||
; ignoring the actual accessed address and focusing on the fact
|
||||
; that the access happened. However, at the moment we assume UB[*V]
|
||||
; not to be loop invariant, thus reject this region.
|
||||
;
|
||||
; CHECK-NOT: Statements
|
||||
;
|
||||
;
|
||||
; void f(int *restrict V, int *restrict UB, int *restrict A) {
|
||||
; for (int i = 0; i < 100; i++) {
|
||||
; int j = 0;
|
||||
; int x = 0;
|
||||
; do {
|
||||
; x = /* invariant load dependent on UB[*V] */ *V;
|
||||
; A[j + i]++;
|
||||
; } while (j++ < /* invariant load dependent on *V */ UB[x]);
|
||||
; }
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f(i32* noalias %V, i32* noalias %UB, i32* noalias %A) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%indvars.iv2 = phi i32 [ %indvars.iv.next3, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i32 %indvars.iv2, 100
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %do.cond, %for.body
|
||||
%indvars.iv = phi i32 [ %indvars.iv.next, %do.cond ], [ 0, %for.body ]
|
||||
%tmp = load i32, i32* %V, align 4
|
||||
%tmp4 = add nuw nsw i32 %indvars.iv, %indvars.iv2
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i32 %tmp4
|
||||
%tmp5 = load i32, i32* %arrayidx, align 4
|
||||
%inc = add nsw i32 %tmp5, 1
|
||||
store i32 %inc, i32* %arrayidx, align 4
|
||||
br label %do.cond
|
||||
|
||||
do.cond: ; preds = %do.body
|
||||
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %UB, i32 %tmp
|
||||
%tmp6 = load i32, i32* %arrayidx3, align 4
|
||||
%cmp4 = icmp slt i32 %indvars.iv, %tmp6
|
||||
br i1 %cmp4, label %do.body, label %do.end
|
||||
|
||||
do.end: ; preds = %do.cond
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %do.end
|
||||
%indvars.iv.next3 = add nuw nsw i32 %indvars.iv2, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses: {
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: MemRef_bounds[2]
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: MemRef_bounds[1]
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: MemRef_bounds[0]
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 }
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
; CHECK: p0: %tmp
|
||||
; CHECK: p1: %tmp8
|
||||
; CHECK: p2: %tmp10
|
||||
; CHECK: Statements {
|
||||
; CHECK: Stmt_for_body_6
|
||||
; CHECK: Domain :=
|
||||
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 };
|
||||
; CHECK: Schedule :=
|
||||
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] };
|
||||
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
|
||||
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] };
|
||||
; CHECK: }
|
||||
;
|
||||
; int bounds[3];
|
||||
; double data[1024][1024][1024];
|
||||
;
|
||||
; void foo() {
|
||||
; int i, j, k;
|
||||
; for (k = 0; k < bounds[2]; k++)
|
||||
; for (j = 0; j < bounds[1]; j++)
|
||||
; for (i = 0; i < bounds[0]; i++)
|
||||
; data[k][j][i] += i + j + k;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@bounds = common global [3 x i32] zeroinitializer, align 4
|
||||
@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc.16, %entry
|
||||
%indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ]
|
||||
%tmp = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 2), align 4
|
||||
%tmp7 = sext i32 %tmp to i64
|
||||
%cmp = icmp slt i64 %indvars.iv5, %tmp7
|
||||
br i1 %cmp, label %for.body, label %for.end.18
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond.1
|
||||
|
||||
for.cond.1: ; preds = %for.inc.13, %for.body
|
||||
%indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ]
|
||||
%tmp8 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 1), align 4
|
||||
%tmp9 = sext i32 %tmp8 to i64
|
||||
%cmp2 = icmp slt i64 %indvars.iv3, %tmp9
|
||||
br i1 %cmp2, label %for.body.3, label %for.end.15
|
||||
|
||||
for.body.3: ; preds = %for.cond.1
|
||||
br label %for.cond.4
|
||||
|
||||
for.cond.4: ; preds = %for.inc, %for.body.3
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ]
|
||||
%tmp10 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 0), align 4
|
||||
%tmp11 = sext i32 %tmp10 to i64
|
||||
%cmp5 = icmp slt i64 %indvars.iv, %tmp11
|
||||
br i1 %cmp5, label %for.body.6, label %for.end
|
||||
|
||||
for.body.6: ; preds = %for.cond.4
|
||||
%tmp12 = add nsw i64 %indvars.iv, %indvars.iv3
|
||||
%tmp13 = add nsw i64 %tmp12, %indvars.iv5
|
||||
%tmp14 = trunc i64 %tmp13 to i32
|
||||
%conv = sitofp i32 %tmp14 to double
|
||||
%arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv
|
||||
%tmp15 = load double, double* %arrayidx11, align 8
|
||||
%add12 = fadd double %tmp15, %conv
|
||||
store double %add12, double* %arrayidx11, align 8
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body.6
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %for.cond.4
|
||||
|
||||
for.end: ; preds = %for.cond.4
|
||||
br label %for.inc.13
|
||||
|
||||
for.inc.13: ; preds = %for.end
|
||||
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
|
||||
br label %for.cond.1
|
||||
|
||||
for.end.15: ; preds = %for.cond.1
|
||||
br label %for.inc.16
|
||||
|
||||
for.inc.16: ; preds = %for.end.15
|
||||
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end.18: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Invariant Accesses: {
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: MemRef_bounds[0]
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: MemRef_bounds[1]
|
||||
; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : tmp >= 0 }
|
||||
; CHECK: }
|
||||
|
||||
; double A[1000][1000];
|
||||
; long bounds[2];
|
||||
;
|
||||
; void foo() {
|
||||
;
|
||||
; for (long i = 0; i <= bounds[0]; i++)
|
||||
; for (long j = 0; j <= bounds[1]; j++)
|
||||
; A[i][j] += i + j;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
@bounds = common global [2 x i64] zeroinitializer, align 16
|
||||
@A = common global [1000 x [1000 x double]] zeroinitializer, align 16
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc.6, %entry
|
||||
%i.0 = phi i64 [ 0, %entry ], [ %inc7, %for.inc.6 ]
|
||||
%tmp = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 0), align 16
|
||||
%cmp = icmp sgt i64 %i.0, %tmp
|
||||
br i1 %cmp, label %for.end.8, label %for.body
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond.1
|
||||
|
||||
for.cond.1: ; preds = %for.inc, %for.body
|
||||
%j.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ]
|
||||
%tmp1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 1), align 8
|
||||
%cmp2 = icmp sgt i64 %j.0, %tmp1
|
||||
br i1 %cmp2, label %for.end, label %for.body.3
|
||||
|
||||
for.body.3: ; preds = %for.cond.1
|
||||
%add = add nsw i64 %i.0, %j.0
|
||||
%conv = sitofp i64 %add to double
|
||||
%arrayidx4 = getelementptr inbounds [1000 x [1000 x double]], [1000 x [1000 x double]]* @A, i64 0, i64 %i.0, i64 %j.0
|
||||
%tmp2 = load double, double* %arrayidx4, align 8
|
||||
%add5 = fadd double %tmp2, %conv
|
||||
store double %add5, double* %arrayidx4, align 8
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body.3
|
||||
%inc = add nuw nsw i64 %j.0, 1
|
||||
br label %for.cond.1
|
||||
|
||||
for.end: ; preds = %for.cond.1
|
||||
br label %for.inc.6
|
||||
|
||||
for.inc.6: ; preds = %for.end
|
||||
%inc7 = add nuw nsw i64 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end.8: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue