[NFC] factor out unrolling decision logic

Decoupling the unrolling logic into three different functions. The shouldPragmaUnroll() covers the 1st and 2nd priorities of the previous code, the shouldFullUnroll() covers the 3rd, and the shouldPartialUnroll() covers the 5th. The output of each function, Optional<unsigned>, could be a value for UP.Count, which means unrolling factor has been set, or None, which means decision hasn't been made yet and should try the next priority.

Reviewed By: mtrofin, jdoerfert

Differential Revision: https://reviews.llvm.org/D106001
This commit is contained in:
Ali Sedaghati 2021-08-18 11:09:04 -07:00 committed by Mircea Trofin
parent 811dbecaf5
commit 4d559837e8
1 changed files with 200 additions and 112 deletions

View File

@ -65,6 +65,7 @@
#include <cassert>
#include <cstdint>
#include <limits>
#include <optional>
#include <string>
#include <tuple>
#include <utility>
@ -319,6 +320,16 @@ struct EstimatedUnrollCost {
unsigned RolledDynamicCost;
};
struct PragmaInfo {
PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
: UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
PragmaEnableUnroll(PEU) {}
const bool UserUnrollCount;
const bool PragmaFullUnroll;
const unsigned PragmaCount;
const bool PragmaEnableUnroll;
};
} // end anonymous namespace
/// Figure out if the loop is worth full unrolling.
@ -747,13 +758,132 @@ public:
// Returns loop size estimation for unrolled loop, given the unrolling
// configuration specified by UP.
uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
uint64_t
getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
const unsigned CountOverwrite = 0) const {
assert(LoopSize >= UP.BEInsns &&
"LoopSize should not be less than BEInsns!");
return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
if (CountOverwrite)
return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
UP.BEInsns;
else
return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
UP.BEInsns;
}
};
static Optional<unsigned>
shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
const unsigned TripMultiple, const unsigned TripCount,
const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
// Using unroll pragma
// 1st priority is unroll count set by "unroll-count" option.
if (PInfo.UserUnrollCount) {
if (UP.AllowRemainder &&
UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
return (unsigned)UnrollCount;
}
// 2nd priority is unroll count set by pragma.
if (PInfo.PragmaCount > 0) {
if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
return PInfo.PragmaCount;
}
if (PInfo.PragmaFullUnroll && TripCount != 0) {
if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
return TripCount;
}
// if didn't return until here, should continue to other priorties
return None;
}
static Optional<unsigned> shouldFullUnroll(
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
return FullUnrollTripCount;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
L, FullUnrollTripCount, DT, SE, EphValues, TTI,
UP.Threshold * UP.MaxPercentThresholdBoost / 100,
UP.MaxIterationsCountToAnalyze)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
return FullUnrollTripCount;
}
}
}
}
return None;
}
static Optional<unsigned>
shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
unsigned count = UP.Count;
if (TripCount) {
if (!UP.Partial) {
LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
count = 0;
return count;
}
if (count == 0)
count = TripCount;
if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
(LoopSize - UP.BEInsns);
if (count > UP.MaxCount)
count = UP.MaxCount;
while (count != 0 && TripCount % count != 0)
count--;
if (UP.AllowRemainder && count <= 1) {
// If there is no Count that is modulo of TripCount, set Count to
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
count = UP.DefaultUnrollRuntimeCount;
while (count != 0 &&
UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
count >>= 1;
}
if (count < 2) {
count = 0;
}
} else {
count = TripCount;
}
if (count > UP.MaxCount)
count = UP.MaxCount;
LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
return count;
}
// if didn't return until here, should continue to other priorties
return None;
}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
@ -771,7 +901,18 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
Optional<unsigned> UnrollFactor;
const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
const unsigned PragmaCount = unrollCountPragmaValue(L);
const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
PragmaEnableUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
@ -783,40 +924,21 @@ bool llvm::computeUnrollCount(
UP.Runtime = false;
return true;
}
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
if (UserUnrollCount) {
UP.Count = UnrollCount;
UP.AllowExpensiveTripCount = true;
UP.Force = true;
if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
return true;
}
// 2nd priority is unroll count set by pragma.
unsigned PragmaCount = unrollCountPragmaValue(L);
if (PragmaCount > 0) {
UP.Count = PragmaCount;
UP.Runtime = true;
UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
if (UnrollFactor) {
UP.Count = *UnrollFactor;
if (UserUnrollCount || (PragmaCount > 0)) {
UP.AllowExpensiveTripCount = true;
UP.Force = true;
if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
return true;
}
bool PragmaFullUnroll = hasUnrollFullPragma(L);
if (PragmaFullUnroll && TripCount != 0) {
UP.Count = TripCount;
if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
return false;
}
bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
UP.Runtime |= (PragmaCount > 0);
return ExplicitUnroll;
} else {
if (ExplicitUnroll && TripCount != 0) {
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
@ -825,6 +947,7 @@ bool llvm::computeUnrollCount(
UP.PartialThreshold =
std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
}
}
// 3rd priority is full unroll count.
// Full unroll makes sense only when TripCount or its upper bound could be
@ -853,28 +976,20 @@ bool llvm::computeUnrollCount(
unsigned FullUnrollTripCount =
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
UnrollFactor =
shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
// if shouldFullUnroll can do the unrolling, some side parameteres should be
// set
if (UnrollFactor) {
UP.Count = *UnrollFactor;
UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
L, FullUnrollTripCount, DT, SE, EphValues, TTI,
UP.Threshold * UP.MaxPercentThresholdBoost / 100,
UP.MaxIterationsCountToAnalyze)) {
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
return ExplicitUnroll;
}
}
}
UP.Count = FullUnrollTripCount;
}
// 4th priority is loop peeling.
@ -885,55 +1000,18 @@ bool llvm::computeUnrollCount(
return ExplicitUnroll;
}
// Before starting partial unrolling, set up.partial to true,
// if user explicitly asked for unrolling
if (TripCount)
UP.Partial |= ExplicitUnroll;
// 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
if (TripCount) {
UP.Partial |= ExplicitUnroll;
if (!UP.Partial) {
LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
UP.Count = 0;
return false;
}
if (UP.Count == 0)
UP.Count = TripCount;
if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
UP.Count =
(std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
(LoopSize - UP.BEInsns);
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
while (UP.Count != 0 && TripCount % UP.Count != 0)
UP.Count--;
if (UP.AllowRemainder && UP.Count <= 1) {
// If there is no Count that is modulo of TripCount, set Count to
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
UP.Count = UP.DefaultUnrollRuntimeCount;
while (UP.Count != 0 &&
UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
UP.Count >>= 1;
}
if (UP.Count < 2) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"UnrollAsDirectedTooLarge",
L->getStartLoc(), L->getHeader())
<< "Unable to unroll loop as directed by unroll(enable) "
"pragma "
"because unrolled size is too large.";
});
UP.Count = 0;
}
} else {
UP.Count = TripCount;
}
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
if (UnrollFactor) {
UP.Count = *UnrollFactor;
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
UP.Count != TripCount)
ORE->emit([&]() {
@ -944,8 +1022,20 @@ bool llvm::computeUnrollCount(
"because "
"unrolled size is too large.";
});
LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
<< "\n");
if (UP.PartialThreshold != NoThreshold) {
if (UP.Count == 0) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"UnrollAsDirectedTooLarge",
L->getStartLoc(), L->getHeader())
<< "Unable to unroll loop as directed by unroll(enable) "
"pragma "
"because unrolled size is too large.";
});
}
}
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@ -982,8 +1072,6 @@ bool llvm::computeUnrollCount(
UP.AllowExpensiveTripCount = true;
}
}
// Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(
@ -1018,7 +1106,7 @@ bool llvm::computeUnrollCount(
using namespace ore;
if (PragmaCount > 0 && !UP.AllowRemainder)
if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DifferentUnrollCountFromDirected",