[NewPM] Add ExtraVectorizerPasses support

As it looks like NewPM generally is using SimpleLoopUnswitch instead of LoopUnswitch, this patch also use SimpleLoopUnswitch in the ExtraVectorizerPasses sequence (compared with LegacyPM which use the LoopUnswitch pass). Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D95457
2021-01-23 00:54:04 +01:00 · 2021-01-23 00:54:04 +01:00 · a9bd3d37bd
parent 5f1d4d4779
commit a9bd3d37bd
3 changed files with 53 additions and 4 deletions
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@ -287,6 +287,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
  MergeFunctions = false;
  UniqueLinkageNames = false;
 }
+extern cl::opt<bool> ExtraVectorizerPasses;

 extern cl::opt<bool> EnableConstraintElimination;
 extern cl::opt<bool> EnableGVNHoist;
@ -1255,6 +1256,28 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
  // Cleanup after the loop optimization passes.
  OptimizePM.addPass(InstCombinePass());

+  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+    // At higher optimization levels, try to clean up any runtime overlap and
+    // alignment checks inserted by the vectorizer. We want to track correlated
+    // runtime checks for two inner loops in the same outer loop, fold any
+    // common computations, hoist loop-invariant aspects out of any outer loop,
+    // and unswitch the runtime checks if possible. Once hoisted, we may have
+    // dead (or speculatable) control flows or more combining opportunities.
+    OptimizePM.addPass(EarlyCSEPass());
+    OptimizePM.addPass(CorrelatedValuePropagationPass());
+    OptimizePM.addPass(InstCombinePass());
+    LoopPassManager LPM(DebugLogging);
+    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+    LPM.addPass(
+        SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
+    OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+    OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+        std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
+        DebugLogging));
+    OptimizePM.addPass(SimplifyCFGPass());
+    OptimizePM.addPass(InstCombinePass());
+  }
+
  // Now that we've formed fast to execute loop structures, we do further
  // optimizations. These are run afterward as they might block doing complex
  // analyses and transforms such as what are needed for loop vectorization.
@ -1274,8 +1297,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
                                         .sinkCommonInsts(true)));

  // Optimize parallel scalar instruction chains into SIMD instructions.
-  if (PTO.SLPVectorization)
+  if (PTO.SLPVectorization) {
    OptimizePM.addPass(SLPVectorizerPass());
+    if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+      OptimizePM.addPass(EarlyCSEPass());
+    }
+  }

  // Enhance/cleanup vector code.
  OptimizePM.addPass(VectorCombinePass());
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@ -60,7 +60,7 @@ UseGVNAfterVectorization("use-gvn-after-vectorization",
  cl::init(false), cl::Hidden,
  cl::desc("Run GVN instead of Early CSE after vectorization passes"));

-static cl::opt<bool> ExtraVectorizerPasses(
+cl::opt<bool> ExtraVectorizerPasses(
    "extra-vectorizer-passes", cl::init(false), cl::Hidden,
    cl::desc("Run cleanup optimization passes after vectorization."));

--- a/llvm/test/Other/opt-pipeline-vector-passes.ll
+++ b/llvm/test/Other/opt-pipeline-vector-passes.ll
@ -5,6 +5,7 @@
 ; RUN: opt -enable-new-pm=0 -O2 -vectorize-loops=0       -debug-pass=Structure  < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF
 ; RUN: opt -disable-verify -debug-pass-manager -passes='default<O1>' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O1
 ; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2
+; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2_EXTRA

 ; REQUIRES: asserts

@ -64,6 +65,27 @@
 ; NEWPM_O2:        Running pass: SLPVectorizerPass
 ; NEWPM_O2:        Running pass: VectorCombinePass

-define void @f() {
-  ret void
+; NEWPM_O2_EXTRA-LABEL: Running pass: LoopVectorizePass
+; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass
+; NEWPM_O2_EXTRA: Running pass: CorrelatedValuePropagationPass
+; NEWPM_O2_EXTRA: Running pass: InstCombinePass
+; NEWPM_O2_EXTRA: Running pass: LICMPass
+; NEWPM_O2_EXTRA: Running pass: SimpleLoopUnswitchPass
+; NEWPM_O2_EXTRA: Running pass: SimplifyCFGPass
+; NEWPM_O2_EXTRA: Running pass: InstCombinePass
+; NEWPM_O2_EXTRA: Running pass: SLPVectorizerPass
+; NEWPM_O2_EXTRA: Running pass: EarlyCSEPass
+; NEWPM_O2_EXTRA: Running pass: VectorCombinePass
+
+define i64 @f(i1 %cond) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i64 %i, 1
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret i64 %i
 }