Minor unroll pass refacoring.

Summary:
Unrolled Loop Size calculations moved to a function.
Constant representing number of optimized instructions
 when "back edge" becomes "fall through" replaced with
 variable.
Some comments added.

Reviewers: mzolotukhin

Differential Revision: http://reviews.llvm.org/D21719

From: Evgeny Stupachenko <evstupac@gmail.com>
llvm-svn: 286389
This commit is contained in:
Evgeny Stupachenko 2016-11-09 19:56:39 +00:00
parent 26f28a2836
commit c2698cd903
3 changed files with 38 additions and 35 deletions

View File

@ -275,6 +275,11 @@ public:
/// applies even if full unrolling is selected. This allows a target to fall
/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
unsigned FullUnrollMaxCount;
// Represents number of instructions optimized when "back edge"
// becomes "fall through" in unrolled loop.
// For now we count a conditional branch on a backedge and a comparison
// feeding it.
unsigned BEInsns;
/// Allow partial unrolling (unrolling of loops to expand the size of the
/// loop body, not only to eliminate small constant-trip-count loops).
bool Partial;

View File

@ -285,6 +285,10 @@ public:
// Avoid unrolling when optimizing for size.
UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;
// Set number of instructions optimized when "back edge"
// becomes "fall through" to default value of 2.
UP.BEInsns = 2;
}
/// @}

View File

@ -126,6 +126,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = UINT_MAX;
UP.FullUnrollMaxCount = UINT_MAX;
UP.BEInsns = 2;
UP.Partial = false;
UP.Runtime = false;
UP.AllowRemainder = true;
@ -541,7 +542,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
AssumptionCache *AC) {
AssumptionCache *AC, unsigned BEInsns) {
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
@ -560,7 +561,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
// that each loop has at least three instructions (likely a conditional
// branch, a comparison feeding that branch, and some kind of loop increment
// feeding that comparison instruction).
LoopSize = std::max(LoopSize, 3u);
LoopSize = std::max(LoopSize, BEInsns + 1);
return LoopSize;
}
@ -699,6 +700,14 @@ static bool canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
// Returns loop size estimation for unrolled loop.
static uint64_t getUnrolledLoopSize(
unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP) {
assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
static bool computeUnrollCount(
@ -706,11 +715,6 @@ static bool computeUnrollCount(
ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// BEInsns represents number of instructions optimized when "back edge"
// becomes "fall through" in unrolled loop.
// For now we count a conditional branch on a backedge and a comparison
// feeding it.
unsigned BEInsns = 2;
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@ -718,8 +722,7 @@ static bool computeUnrollCount(
UP.Count = UnrollCount;
UP.AllowExpensiveTripCount = true;
UP.Force = true;
if (UP.AllowRemainder &&
(LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
return true;
}
@ -731,13 +734,13 @@ static bool computeUnrollCount(
UP.AllowExpensiveTripCount = true;
UP.Force = true;
if (UP.AllowRemainder &&
(LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return true;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
if (PragmaFullUnroll && TripCount != 0) {
UP.Count = TripCount;
if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
return false;
}
@ -745,8 +748,6 @@ static bool computeUnrollCount(
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
uint64_t UnrolledSize;
if (ExplicitUnroll && TripCount != 0) {
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits. Set thresholds to at least the PragmaThreshold value
@ -768,17 +769,16 @@ static bool computeUnrollCount(
assert((ExactTripCount == 0 || MaxTripCount == 0) &&
"ExtractTripCound and MaxTripCount cannot both be non zero.");
unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
UP.Count = FullUnrollTripCount;
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
UnrolledSize =
(uint64_t)(LoopSize - BEInsns) * FullUnrollTripCount + BEInsns;
if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
getUnrolledLoopSize(LoopSize, UP),
getUnrolledLoopSize(LoopSize, UP))) {
UseUpperBound = (MaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
UP.Count = TripCount;
return ExplicitUnroll;
} else {
// The loop isn't that small, but we still can fully unroll it if that
@ -794,7 +794,6 @@ static bool computeUnrollCount(
UseUpperBound = (MaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
UP.Count = TripCount;
return ExplicitUnroll;
}
}
@ -814,10 +813,10 @@ static bool computeUnrollCount(
UP.Count = TripCount;
if (UP.PartialThreshold != NoThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
if (UnrolledSize > UP.PartialThreshold)
UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
(LoopSize - BEInsns);
if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
UP.Count =
(std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
(LoopSize - UP.BEInsns);
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
while (UP.Count != 0 && TripCount % UP.Count != 0)
@ -828,11 +827,9 @@ static bool computeUnrollCount(
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
UP.Count = UP.DefaultUnrollRuntimeCount;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
while (UP.Count != 0 &&
getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
UP.Count >>= 1;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
}
}
if (UP.Count < 2) {
if (PragmaEnableUnroll)
@ -881,14 +878,12 @@ static bool computeUnrollCount(
}
if (UP.Count == 0)
UP.Count = UP.DefaultUnrollRuntimeCount;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
while (UP.Count != 0 &&
getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
UP.Count >>= 1;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
}
#ifndef NDEBUG
unsigned OrigCount = UP.Count;
@ -944,8 +939,11 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
ProvidedRuntime, ProvidedUpperBound);
unsigned LoopSize = ApproximateLoopSize(
L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NotDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
@ -977,10 +975,6 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
ProvidedRuntime, ProvidedUpperBound);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
return false;