forked from OSchip/llvm-project
[NFC] Separate Peeling Properties into its own struct
Summary: This patch makes the peeling properties of the loop accessible by other loop transformations. Author: sidbav (Sidharth Baveja) Reviewers: Whitney (Whitney Tsang), Meinersbur (Michael Kruse), skatkov (Serguei Katkov), ashlykov (Arkady Shlykov), bogner (Justin Bogner), hfinkel (Hal Finkel) Reviewed By: Meinersbur (Michael Kruse) Subscribers: fhahn (Florian Hahn), hiraditya (Aditya Kumar), llvm-commits, LLVM Tag: LLVM Differential Revision: https://reviews.llvm.org/D80580
This commit is contained in:
parent
ef14e52be4
commit
fead250b43
|
@ -450,11 +450,6 @@ public:
|
|||
/// transformation will select an unrolling factor based on the current cost
|
||||
/// threshold and other factors.
|
||||
unsigned Count;
|
||||
/// A forced peeling factor (the number of bodied of the original loop
|
||||
/// that should be peeled off before the loop body). When set to 0, the
|
||||
/// unrolling transformation will select a peeling factor based on profile
|
||||
/// information and other factors.
|
||||
unsigned PeelCount;
|
||||
/// Default unroll count for loops with run-time trip count.
|
||||
unsigned DefaultUnrollRuntimeCount;
|
||||
// Set the maximum unrolling factor. The unrolling factor may be selected
|
||||
|
@ -488,19 +483,10 @@ public:
|
|||
bool Force;
|
||||
/// Allow using trip count upper bound to unroll loops.
|
||||
bool UpperBound;
|
||||
/// Allow peeling off loop iterations.
|
||||
bool AllowPeeling;
|
||||
/// Allow peeling off loop iterations for loop nests.
|
||||
bool AllowLoopNestsPeeling;
|
||||
/// Allow unrolling of all the iterations of the runtime loop remainder.
|
||||
bool UnrollRemainder;
|
||||
/// Allow unroll and jam. Used to enable unroll and jam for the target.
|
||||
bool UnrollAndJam;
|
||||
/// Allow peeling basing on profile. Uses to enable peeling off all
|
||||
/// iterations basing on provided profile.
|
||||
/// If the value is true the peeling cost model can decide to peel only
|
||||
/// some iterations and in this case it will set this to false.
|
||||
bool PeelProfiledIterations;
|
||||
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
|
||||
/// value above is used during unroll and jam for the outer loop size.
|
||||
/// This value is used in the same manner to limit the size of the inner
|
||||
|
@ -534,6 +520,28 @@ public:
|
|||
/// intrinsic is supported.
|
||||
bool emitGetActiveLaneMask() const;
|
||||
|
||||
// Parameters that control the loop peeling transformation
|
||||
struct PeelingPreferences {
|
||||
/// A forced peeling factor (the number of bodied of the original loop
|
||||
/// that should be peeled off before the loop body). When set to 0, the
|
||||
/// a peeling factor based on profile information and other factors.
|
||||
unsigned PeelCount;
|
||||
/// Allow peeling off loop iterations.
|
||||
bool AllowPeeling;
|
||||
/// Allow peeling off loop iterations for loop nests.
|
||||
bool AllowLoopNestsPeeling;
|
||||
/// Allow peeling basing on profile. Uses to enable peeling off all
|
||||
/// iterations basing on provided profile.
|
||||
/// If the value is true the peeling cost model can decide to peel only
|
||||
/// some iterations and in this case it will set this to false.
|
||||
bool PeelProfiledIterations;
|
||||
};
|
||||
|
||||
/// Get target-customized preferences for the generic loop peeling
|
||||
/// transformation. The caller will initialize \p PP with the current
|
||||
/// target-independent defaults with information from \p L and \p SE.
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
PeelingPreferences &PP) const;
|
||||
/// @}
|
||||
|
||||
/// \name Scalar Target Information
|
||||
|
@ -1282,6 +1290,8 @@ public:
|
|||
virtual bool isLoweredToCall(const Function *F) = 0;
|
||||
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
|
||||
UnrollingPreferences &UP) = 0;
|
||||
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
PeelingPreferences &PP) = 0;
|
||||
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
||||
AssumptionCache &AC,
|
||||
TargetLibraryInfo *LibInfo,
|
||||
|
@ -1560,6 +1570,10 @@ public:
|
|||
UnrollingPreferences &UP) override {
|
||||
return Impl.getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
PeelingPreferences &PP) override {
|
||||
return Impl.getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
||||
AssumptionCache &AC, TargetLibraryInfo *LibInfo,
|
||||
HardwareLoopInfo &HWLoopInfo) override {
|
||||
|
|
|
@ -150,6 +150,9 @@ public:
|
|||
void getUnrollingPreferences(Loop *, ScalarEvolution &,
|
||||
TTI::UnrollingPreferences &) {}
|
||||
|
||||
void getPeelingPreferences(Loop *, ScalarEvolution &,
|
||||
TTI::PeelingPreferences &) {}
|
||||
|
||||
bool isLegalAddImmediate(int64_t Imm) { return false; }
|
||||
|
||||
bool isLegalICmpImmediate(int64_t Imm) { return false; }
|
||||
|
|
|
@ -451,6 +451,14 @@ public:
|
|||
UP.BEInsns = 2;
|
||||
}
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
PP.PeelCount = 0;
|
||||
PP.AllowPeeling = true;
|
||||
PP.AllowLoopNestsPeeling = false;
|
||||
PP.PeelProfiledIterations = true;
|
||||
}
|
||||
|
||||
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
||||
AssumptionCache &AC,
|
||||
TargetLibraryInfo *LibInfo,
|
||||
|
|
|
@ -94,6 +94,7 @@ bool UnrollRuntimeLoopRemainder(
|
|||
|
||||
void computePeelCount(Loop *L, unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP,
|
||||
TargetTransformInfo::PeelingPreferences &PP,
|
||||
unsigned &TripCount, ScalarEvolution &SE);
|
||||
|
||||
bool canPeel(Loop *L);
|
||||
|
@ -119,6 +120,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|||
unsigned MaxTripCount, bool MaxOrZero,
|
||||
unsigned &TripMultiple, unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP,
|
||||
TargetTransformInfo::PeelingPreferences &PP,
|
||||
|
||||
bool &UseUpperBound);
|
||||
|
||||
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
|
||||
|
@ -133,9 +136,13 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
|||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
|
||||
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
|
||||
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
|
||||
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
|
||||
Optional<bool> UserAllowProfileBasedPeeling,
|
||||
Optional<unsigned> UserFullUnrollMaxCount);
|
||||
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
|
||||
|
||||
TargetTransformInfo::PeelingPreferences
|
||||
gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
const TargetTransformInfo &TTI,
|
||||
Optional<bool> UserAllowPeeling,
|
||||
Optional<bool> UserAllowProfileBasedPeeling);
|
||||
|
||||
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
|
||||
bool &NotDuplicatable, bool &Convergent,
|
||||
|
|
|
@ -327,6 +327,11 @@ void TargetTransformInfo::getUnrollingPreferences(
|
|||
return TTIImpl->getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
||||
void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
PeelingPreferences &PP) const {
|
||||
return TTIImpl->getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
|
||||
return TTIImpl->isLegalAddImmediate(Imm);
|
||||
}
|
||||
|
|
|
@ -859,6 +859,11 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
getFalkorUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
||||
void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
||||
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
|
||||
Type *ExpectedType) {
|
||||
switch (Inst->getIntrinsicID()) {
|
||||
|
|
|
@ -153,6 +153,9 @@ public:
|
|||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
|
||||
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
|
||||
Type *ExpectedType);
|
||||
|
||||
|
|
|
@ -236,6 +236,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
}
|
||||
}
|
||||
|
||||
void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
|
||||
// The concept of vector registers doesn't really exist. Some packed vector
|
||||
// operations operate on the normal 32-bit registers.
|
||||
|
@ -990,6 +994,11 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
CommonTTI.getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
||||
void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
CommonTTI.getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
||||
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
|
||||
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
|
||||
}
|
||||
|
@ -1096,3 +1105,8 @@ void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
TTI::UnrollingPreferences &UP) {
|
||||
CommonTTI.getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
||||
void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
CommonTTI.getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
|
|
@ -61,6 +61,9 @@ public:
|
|||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
};
|
||||
|
||||
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
|
@ -141,6 +144,9 @@ public:
|
|||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
|
||||
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
|
||||
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
|
||||
return TTI::PSK_FastHardware;
|
||||
|
@ -258,6 +264,8 @@ public:
|
|||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
unsigned getHardwareNumberOfRegisters(bool Vec) const;
|
||||
unsigned getNumberOfRegisters(bool Vec) const;
|
||||
unsigned getRegisterBitWidth(bool Vector) const;
|
||||
|
|
|
@ -1582,6 +1582,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
UP.Force = true;
|
||||
}
|
||||
|
||||
void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
||||
bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
|
||||
TTI::ReductionFlags Flags) const {
|
||||
return ST->hasMVEIntegerOps();
|
||||
|
|
|
@ -251,6 +251,8 @@ public:
|
|||
|
||||
bool emitGetActiveLaneMask() const;
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
bool shouldBuildLookupTablesForConstant(Constant *C) const {
|
||||
// In the ROPI and RWPI relocation models we can't have pointers to global
|
||||
// variables or functions in constant data, so don't convert switches to
|
||||
|
|
|
@ -78,12 +78,17 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
|
|||
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP) {
|
||||
UP.Runtime = UP.Partial = true;
|
||||
}
|
||||
|
||||
void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
// Only try to peel innermost loops with small runtime trip counts.
|
||||
if (L && L->empty() && canPeel(L) &&
|
||||
SE.getSmallConstantTripCount(L) == 0 &&
|
||||
SE.getSmallConstantMaxTripCount(L) > 0 &&
|
||||
SE.getSmallConstantMaxTripCount(L) <= 5) {
|
||||
UP.PeelCount = 2;
|
||||
PP.PeelCount = 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -64,6 +64,9 @@ public:
|
|||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
|
||||
/// Bias LSR towards creating post-increment opportunities.
|
||||
bool shouldFavorPostInc() const;
|
||||
|
||||
|
|
|
@ -155,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
UP.Partial = UP.Runtime = true;
|
||||
UP.PartialThreshold = UP.Threshold / 4;
|
||||
}
|
||||
|
||||
void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
|
|
@ -95,6 +95,10 @@ public:
|
|||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
|
||||
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
|
||||
// Volatile loads/stores are only supported for shared and global address
|
||||
// spaces, or for generic AS that maps to them.
|
||||
|
|
|
@ -568,6 +568,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
BaseT::getUnrollingPreferences(L, SE, UP);
|
||||
}
|
||||
|
||||
void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
// This function returns true to allow using coldcc calling convention.
|
||||
// Returning true results in coldcc being used for functions which are cold at
|
||||
// all call sites when the callers of the functions are not calling any other
|
||||
|
|
|
@ -66,6 +66,8 @@ public:
|
|||
TargetLibraryInfo *LibInfo);
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2);
|
||||
|
||||
|
|
|
@ -294,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
UP.Force = true;
|
||||
}
|
||||
|
||||
void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP) {
|
||||
BaseT::getPeelingPreferences(L, SE, PP);
|
||||
}
|
||||
|
||||
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) {
|
||||
|
|
|
@ -50,6 +50,9 @@ public:
|
|||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
||||
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::PeelingPreferences &PP);
|
||||
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2);
|
||||
/// @}
|
||||
|
|
|
@ -158,7 +158,8 @@ static bool computeUnrollAndJamCount(
|
|||
const SmallPtrSetImpl<const Value *> &EphValues,
|
||||
OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
|
||||
unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
|
||||
unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
|
||||
unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
|
||||
TargetTransformInfo::PeelingPreferences &PP) {
|
||||
// First up use computeUnrollCount from the loop unroller to get a count
|
||||
// for unrolling the outer loop, plus any loops requiring explicit
|
||||
// unrolling we leave to the unroller. This uses UP.Threshold /
|
||||
|
@ -168,7 +169,8 @@ static bool computeUnrollAndJamCount(
|
|||
bool UseUpperBound = false;
|
||||
bool ExplicitUnroll = computeUnrollCount(
|
||||
L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
|
||||
/*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
|
||||
/*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
|
||||
UseUpperBound);
|
||||
if (ExplicitUnroll || UseUpperBound) {
|
||||
// If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
|
||||
// for the unroller instead.
|
||||
|
@ -282,7 +284,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
|||
OptimizationRemarkEmitter &ORE, int OptLevel) {
|
||||
TargetTransformInfo::UnrollingPreferences UP =
|
||||
gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
|
||||
None, None, None, None, None, None, None);
|
||||
None, None, None, None, None);
|
||||
TargetTransformInfo::PeelingPreferences PP =
|
||||
gatherPeelingPreferences(L, SE, TTI, None, None);
|
||||
if (AllowUnrollAndJam.getNumOccurrences() > 0)
|
||||
UP.UnrollAndJam = AllowUnrollAndJam;
|
||||
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
|
||||
|
@ -367,7 +371,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
|||
// Decide if, and by how much, to unroll
|
||||
bool IsCountSetExplicitly = computeUnrollAndJamCount(
|
||||
L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
|
||||
OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP);
|
||||
OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
|
||||
if (UP.Count <= 1)
|
||||
return LoopUnrollResult::Unmodified;
|
||||
// Unroll factor (Count) must be less or equal to TripCount.
|
||||
|
|
|
@ -193,9 +193,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
|
||||
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
|
||||
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
|
||||
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
|
||||
Optional<bool> UserAllowProfileBasedPeeling,
|
||||
Optional<unsigned> UserFullUnrollMaxCount) {
|
||||
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
|
||||
TargetTransformInfo::UnrollingPreferences UP;
|
||||
|
||||
// Set up the defaults
|
||||
|
@ -206,7 +204,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
UP.PartialThreshold = 150;
|
||||
UP.PartialOptSizeThreshold = 0;
|
||||
UP.Count = 0;
|
||||
UP.PeelCount = 0;
|
||||
UP.DefaultUnrollRuntimeCount = 8;
|
||||
UP.MaxCount = std::numeric_limits<unsigned>::max();
|
||||
UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
|
||||
|
@ -218,10 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
UP.AllowExpensiveTripCount = false;
|
||||
UP.Force = false;
|
||||
UP.UpperBound = false;
|
||||
UP.AllowPeeling = true;
|
||||
UP.AllowLoopNestsPeeling = false;
|
||||
UP.UnrollAndJam = false;
|
||||
UP.PeelProfiledIterations = true;
|
||||
UP.UnrollAndJamInnerLoopThreshold = 60;
|
||||
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
|
||||
|
||||
|
@ -249,8 +243,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
UP.MaxCount = UnrollMaxCount;
|
||||
if (UnrollFullMaxCount.getNumOccurrences() > 0)
|
||||
UP.FullUnrollMaxCount = UnrollFullMaxCount;
|
||||
if (UnrollPeelCount.getNumOccurrences() > 0)
|
||||
UP.PeelCount = UnrollPeelCount;
|
||||
if (UnrollAllowPartial.getNumOccurrences() > 0)
|
||||
UP.Partial = UnrollAllowPartial;
|
||||
if (UnrollAllowRemainder.getNumOccurrences() > 0)
|
||||
|
@ -259,10 +251,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
UP.Runtime = UnrollRuntime;
|
||||
if (UnrollMaxUpperBound == 0)
|
||||
UP.UpperBound = false;
|
||||
if (UnrollAllowPeeling.getNumOccurrences() > 0)
|
||||
UP.AllowPeeling = UnrollAllowPeeling;
|
||||
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
|
||||
UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
|
||||
if (UnrollUnrollRemainder.getNumOccurrences() > 0)
|
||||
UP.UnrollRemainder = UnrollUnrollRemainder;
|
||||
if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
|
||||
|
@ -281,16 +269,39 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
UP.Runtime = *UserRuntime;
|
||||
if (UserUpperBound.hasValue())
|
||||
UP.UpperBound = *UserUpperBound;
|
||||
if (UserAllowPeeling.hasValue())
|
||||
UP.AllowPeeling = *UserAllowPeeling;
|
||||
if (UserAllowProfileBasedPeeling.hasValue())
|
||||
UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
|
||||
if (UserFullUnrollMaxCount.hasValue())
|
||||
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
|
||||
|
||||
return UP;
|
||||
}
|
||||
|
||||
TargetTransformInfo::PeelingPreferences
|
||||
llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
const TargetTransformInfo &TTI,
|
||||
Optional<bool> UserAllowPeeling,
|
||||
Optional<bool> UserAllowProfileBasedPeeling) {
|
||||
TargetTransformInfo::PeelingPreferences PP;
|
||||
|
||||
// Get Target Specifc Values
|
||||
TTI.getPeelingPreferences(L, SE, PP);
|
||||
|
||||
// User Specified Values using cl::opt
|
||||
if (UnrollPeelCount.getNumOccurrences() > 0)
|
||||
PP.PeelCount = UnrollPeelCount;
|
||||
if (UnrollAllowPeeling.getNumOccurrences() > 0)
|
||||
PP.AllowPeeling = UnrollAllowPeeling;
|
||||
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
|
||||
PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
|
||||
|
||||
// User Specifed values provided by argument
|
||||
if (UserAllowPeeling.hasValue())
|
||||
PP.AllowPeeling = *UserAllowPeeling;
|
||||
if (UserAllowProfileBasedPeeling.hasValue())
|
||||
PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
|
||||
|
||||
return PP;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// A struct to densely store the state of an instruction after unrolling at
|
||||
|
@ -761,7 +772,8 @@ bool llvm::computeUnrollCount(
|
|||
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
|
||||
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
|
||||
bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
|
||||
TargetTransformInfo::UnrollingPreferences &UP,
|
||||
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
|
||||
|
||||
// Check for explicit Count.
|
||||
// 1st priority is unroll count set by "unroll-count" option.
|
||||
|
@ -863,8 +875,8 @@ bool llvm::computeUnrollCount(
|
|||
}
|
||||
|
||||
// 4th priority is loop peeling.
|
||||
computePeelCount(L, LoopSize, UP, TripCount, SE);
|
||||
if (UP.PeelCount) {
|
||||
computePeelCount(L, LoopSize, UP, PP, TripCount, SE);
|
||||
if (PP.PeelCount) {
|
||||
UP.Runtime = false;
|
||||
UP.Count = 1;
|
||||
return ExplicitUnroll;
|
||||
|
@ -1067,8 +1079,9 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
|
||||
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
|
||||
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
|
||||
ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
|
||||
ProvidedFullUnrollMaxCount);
|
||||
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
|
||||
L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
|
||||
|
||||
// Exit early if unrolling is disabled. For OptForSize, we pick the loop size
|
||||
// as threshold later on.
|
||||
|
@ -1142,7 +1155,7 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
bool UseUpperBound = false;
|
||||
bool IsCountSetExplicitly = computeUnrollCount(
|
||||
L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
|
||||
TripMultiple, LoopSize, UP, UseUpperBound);
|
||||
TripMultiple, LoopSize, UP, PP, UseUpperBound);
|
||||
if (!UP.Count)
|
||||
return LoopUnrollResult::Unmodified;
|
||||
// Unroll factor (Count) must be less or equal to TripCount.
|
||||
|
@ -1157,7 +1170,7 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
LoopUnrollResult UnrollResult = UnrollLoop(
|
||||
L,
|
||||
{UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
|
||||
UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
|
||||
UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder,
|
||||
ForgetAllSCEV},
|
||||
LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
|
||||
if (UnrollResult == LoopUnrollResult::Unmodified)
|
||||
|
@ -1189,7 +1202,7 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
// If the loop was peeled, we already "used up" the profile information
|
||||
// we had, so we don't want to unroll or peel again.
|
||||
if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
|
||||
(IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
|
||||
(IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
|
||||
L->setLoopAlreadyUnrolled();
|
||||
|
||||
return UnrollResult;
|
||||
|
|
|
@ -279,19 +279,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
|
|||
// Return the number of iterations we want to peel off.
|
||||
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
||||
TargetTransformInfo::UnrollingPreferences &UP,
|
||||
TargetTransformInfo::PeelingPreferences &PP,
|
||||
unsigned &TripCount, ScalarEvolution &SE) {
|
||||
assert(LoopSize > 0 && "Zero loop size is not allowed!");
|
||||
// Save the UP.PeelCount value set by the target in
|
||||
// TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
|
||||
unsigned TargetPeelCount = UP.PeelCount;
|
||||
UP.PeelCount = 0;
|
||||
// Save the PP.PeelCount value set by the target in
|
||||
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
|
||||
unsigned TargetPeelCount = PP.PeelCount;
|
||||
PP.PeelCount = 0;
|
||||
if (!canPeel(L))
|
||||
return;
|
||||
|
||||
// Only try to peel innermost loops by default.
|
||||
// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
|
||||
// or by the flag -unroll-allow-loop-nests-peeling.
|
||||
if (!UP.AllowLoopNestsPeeling && !L->empty())
|
||||
if (!PP.AllowLoopNestsPeeling && !L->empty())
|
||||
return;
|
||||
|
||||
// If the user provided a peel count, use that.
|
||||
|
@ -299,13 +300,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
|||
if (UserPeelCount) {
|
||||
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
|
||||
<< " iterations.\n");
|
||||
UP.PeelCount = UnrollForcePeelCount;
|
||||
UP.PeelProfiledIterations = true;
|
||||
PP.PeelCount = UnrollForcePeelCount;
|
||||
PP.PeelProfiledIterations = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip peeling if it's disabled.
|
||||
if (!UP.AllowPeeling)
|
||||
if (!PP.AllowPeeling)
|
||||
return;
|
||||
|
||||
unsigned AlreadyPeeled = 0;
|
||||
|
@ -354,8 +355,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
|||
LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
|
||||
<< " iteration(s) to turn"
|
||||
<< " some Phis into invariants.\n");
|
||||
UP.PeelCount = DesiredPeelCount;
|
||||
UP.PeelProfiledIterations = false;
|
||||
PP.PeelCount = DesiredPeelCount;
|
||||
PP.PeelProfiledIterations = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -367,7 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
|||
return;
|
||||
|
||||
// Do not apply profile base peeling if it is disabled.
|
||||
if (!UP.PeelProfiledIterations)
|
||||
if (!PP.PeelProfiledIterations)
|
||||
return;
|
||||
// If we don't know the trip count, but have reason to believe the average
|
||||
// trip count is low, peeling should be beneficial, since we will usually
|
||||
|
@ -387,7 +388,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
|
|||
(LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
|
||||
LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
|
||||
<< " iterations.\n");
|
||||
UP.PeelCount = *PeelCount;
|
||||
PP.PeelCount = *PeelCount;
|
||||
return;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
|
||||
|
|
Loading…
Reference in New Issue