forked from OSchip/llvm-project
[Passes] Run peeling as part of simple/full loop unrolling.
Loop peeling removes conditions from loop bodies that become invariant after a small number of iterations. When triggered, this leads to fewer compares and possibly PHIs in loop bodies, enabling further optimizations. The current cost-model of loop peeling should be quite conservative/safe, i.e. only peel if a condition in the loop becomes known after peeling. For example, see PR47671, where loop peeling enables vectorization by removing a PHI the vectorizer does not understand. Granted, the loop-vectorizer could also be taught about constant PHIs, but loop peeling is likely to enable other optimizations as well. This has an impact on quite a few benchmarks from MultiSource/SPEC2000/SPEC2006 on X86 with -O3 -flto, for example Same hash: 186 (filtered out) Remaining: 51 Metric: loop-vectorize.LoopsVectorized Program base patch diff test-suite...ve-susan/automotive-susan.test 8.00 9.00 12.5% test-suite...nal/skidmarks10/skidmarks.test 35.00 31.00 -11.4% test-suite...lications/sqlite3/sqlite3.test 41.00 43.00 4.9% test-suite...s/ASC_Sequoia/AMGmk/AMGmk.test 25.00 26.00 4.0% test-suite...006/450.soplex/450.soplex.test 88.00 89.00 1.1% test-suite...TimberWolfMC/timberwolfmc.test 120.00 119.00 -0.8% test-suite.../CINT2006/403.gcc/403.gcc.test 215.00 216.00 0.5% test-suite...006/447.dealII/447.dealII.test 957.00 958.00 0.1% test-suite...ternal/HMMER/hmmcalibrate.test 75.00 75.00 0.0% Same hash: 186 (filtered out) Remaining: 51 Metric: loop-vectorize.LoopsAnalyzed Program base patch diff test-suite...ks/Prolangs-C/agrep/agrep.test 440.00 434.00 -1.4% test-suite...nal/skidmarks10/skidmarks.test 312.00 308.00 -1.3% test-suite...marks/7zip/7zip-benchmark.test 6399.00 6323.00 -1.2% test-suite...lications/minisat/minisat.test 134.00 135.00 0.7% test-suite...rks/FreeBench/pifft/pifft.test 295.00 297.00 0.7% test-suite...TimberWolfMC/timberwolfmc.test 1879.00 1869.00 -0.5% test-suite...pplications/treecc/treecc.test 689.00 691.00 0.3% test-suite...T2000/300.twolf/300.twolf.test 1593.00 1597.00 0.3% test-suite.../Benchmarks/Bullet/bullet.test 1394.00 1392.00 -0.1% test-suite...ications/JM/ldecod/ldecod.test 1431.00 1429.00 -0.1% test-suite...6/464.h264ref/464.h264ref.test 2229.00 2230.00 0.0% test-suite...lications/sqlite3/sqlite3.test 2590.00 2589.00 -0.0% test-suite...ications/JM/lencod/lencod.test 2732.00 2733.00 0.0% test-suite...006/453.povray/453.povray.test 3395.00 3394.00 -0.0% Note the -11% regression in number of loops vectorized for skidmarks. I suspect this corresponds to the fact that those loops are gone now (see the reduction in number of loops analyzed by LV). Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D88471
This commit is contained in:
parent
4dc110a4b8
commit
35b3989a30
|
@ -190,7 +190,8 @@ Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
|
|||
int Count = -1, int AllowPartial = -1,
|
||||
int Runtime = -1, int UpperBound = -1,
|
||||
int AllowPeeling = -1);
|
||||
// Create an unrolling pass for full unrolling that uses exact trip count only.
|
||||
// Create an unrolling pass for full unrolling that uses exact trip count only
|
||||
// and also does peeling.
|
||||
Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
|
||||
bool ForgetAllSCEV = false);
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ class Function;
|
|||
class Loop;
|
||||
class LPMUpdater;
|
||||
|
||||
/// Loop unroll pass that only does full loop unrolling.
|
||||
/// Loop unroll pass that only does full loop unrolling and peeling.
|
||||
class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
|
||||
const int OptLevel;
|
||||
|
||||
|
|
|
@ -458,7 +458,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
|
|||
if (EnableLoopInterchange)
|
||||
MPM.add(createLoopInterchangePass()); // Interchange loops
|
||||
|
||||
// Unroll small loops
|
||||
// Unroll small loops and perform peeling.
|
||||
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
||||
ForgetAllSCEVInLoopUnroll));
|
||||
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
|
||||
|
@ -1072,7 +1072,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
|
|||
if (EnableConstraintElimination)
|
||||
PM.add(createConstraintEliminationPass());
|
||||
|
||||
// Unroll small loops
|
||||
// Unroll small loops and perform peeling.
|
||||
PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
|
||||
ForgetAllSCEVInLoopUnroll));
|
||||
PM.add(createLoopDistributePass());
|
||||
|
|
|
@ -1301,7 +1301,7 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
|
|||
Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
|
||||
bool ForgetAllSCEV) {
|
||||
return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
|
||||
0, 0, 0, 0);
|
||||
0, 0, 0, 1);
|
||||
}
|
||||
|
||||
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
||||
|
@ -1329,7 +1329,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
|||
OnlyWhenForced, ForgetSCEV, /*Count*/ None,
|
||||
/*Threshold*/ None, /*AllowPartial*/ false,
|
||||
/*Runtime*/ false, /*UpperBound*/ false,
|
||||
/*AllowPeeling*/ false,
|
||||
/*AllowPeeling*/ true,
|
||||
/*AllowProfileBasedPeeling*/ false,
|
||||
/*FullUnrollMaxCount*/ None) !=
|
||||
LoopUnrollResult::Unmodified;
|
||||
|
|
|
@ -11,7 +11,9 @@ target triple = "x86_64-apple-macosx"
|
|||
|
||||
define i32 @test(i32* readonly %p, i32* readnone %q) {
|
||||
; CHECK-LABEL: define i32 @test(
|
||||
; CHECK-NOT: vector.body
|
||||
; CHECK: vector.body:
|
||||
; CHECK: %index.next = add i64 %index, 8
|
||||
; CHECK: middle.block:
|
||||
;
|
||||
entry:
|
||||
%cmp.not7 = icmp eq i32* %p, %q
|
||||
|
|
Loading…
Reference in New Issue