[LICM&MSSA] Limit store hoisting.

Summary: If there is no clobbering access for a store inside the loop, that store can only be hoisted if there are no interfearing loads. A more general verification introduced here: there are no loads that are not optimized to an access outside the loop. Addresses PR40586. Reviewers: george.burgess.iv Subscribers: sanjoy, jlebar, Prazek, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57967 llvm-svn: 353734
2019-02-11 19:07:15 +00:00 · 2019-02-11 19:07:15 +00:00 · 605b21739d
parent 4b86c474ff
commit 605b21739d
4 changed files with 81 additions and 21 deletions
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@ -112,7 +112,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                TargetLibraryInfo *, TargetTransformInfo *, Loop *,
                AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                OptimizationRemarkEmitter *ORE);
+                bool, OptimizationRemarkEmitter *);

 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
@ -124,8 +124,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 /// ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                 MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                 OptimizationRemarkEmitter *ORE);
+                 MemorySSAUpdater *, ICFLoopSafetyInfo *, bool,
+                 OptimizationRemarkEmitter *);

 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@ -276,6 +276,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                        Loop *CurLoop, AliasSetTracker *CurAST,
                        MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
+                        bool NoOfMemAccessesTooLarge,
                        OptimizationRemarkEmitter *ORE = nullptr);

 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@ -306,7 +306,8 @@ bool LoopInvariantCodeMotion::runOnLoop(

  std::unique_ptr<AliasSetTracker> CurAST;
  std::unique_ptr<MemorySSAUpdater> MSSAU;
-  bool LocalDisablePromotion = false;
+  bool NoOfMemAccTooLarge = false;
+
  if (!MSSA) {
    LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
    CurAST = collectAliasInfoForLoop(L, LI, AA);
@ -321,12 +322,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
          (void)MA;
          AccessCapCount++;
          if (AccessCapCount > AccessCapForMSSAPromotion) {
-            LocalDisablePromotion = true;
+            NoOfMemAccTooLarge = true;
            break;
          }
        }
      }
-      if (LocalDisablePromotion)
+      if (NoOfMemAccTooLarge)
        break;
    }
  }
@ -350,10 +351,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
  //
  if (L->hasDedicatedExits())
    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+                          CurAST.get(), MSSAU.get(), &SafetyInfo,
+                          NoOfMemAccTooLarge, ORE);
  if (Preheader)
    Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+                           CurAST.get(), MSSAU.get(), &SafetyInfo,
+                           NoOfMemAccTooLarge, ORE);

  // Now that all loop invariants have been removed from the loop, promote any
  // memory references to scalars that we can.
@ -363,7 +366,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
  // preheader for SSA updater, so also avoid sinking when no preheader
  // is available.
  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
-      !LocalDisablePromotion) {
+      !NoOfMemAccTooLarge) {
    // Figure out the loop exits and their insertion points
    SmallVector<BasicBlock *, 8> ExitBlocks;
    L->getUniqueExitBlocks(ExitBlocks);
@ -457,7 +460,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                      DominatorTree *DT, TargetLibraryInfo *TLI,
                      TargetTransformInfo *TTI, Loop *CurLoop,
                      AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                      ICFLoopSafetyInfo *SafetyInfo,
+                      ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
                      OptimizationRemarkEmitter *ORE) {

  // Verify inputs.
@ -501,7 +504,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
      //
      bool FreeInLoop = false;
      if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
+                             NoOfMemAccTooLarge, ORE) &&
          !I.mayHaveSideEffects()) {
        if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
          if (!FreeInLoop) {
@ -755,7 +759,7 @@ public:
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
-                       ICFLoopSafetyInfo *SafetyInfo,
+                       ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
                       OptimizationRemarkEmitter *ORE) {
  // Verify inputs.
  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@ -808,7 +812,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
      // and we have accurately duplicated the control flow from the loop header
      // to that block.
      if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
+                             NoOfMemAccTooLarge, ORE) &&
          isSafeToExecuteUnconditionally(
              I, DT, CurLoop, SafetyInfo, ORE,
              CurLoop->getLoopPreheader()->getTerminator())) {
@ -1035,6 +1040,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                              Loop *CurLoop, AliasSetTracker *CurAST,
                              MemorySSAUpdater *MSSAU,
                              bool TargetExecutesOncePerLoop,
+                              bool NoOfMemAccTooLarge,
                              OptimizationRemarkEmitter *ORE) {
  // If we don't understand the instruction, bail early.
  if (!isHoistableAndSinkableInst(I))
@ -1173,9 +1179,28 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
        return true;
      if (!EnableLicmCap) {
        auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
-        if (MSSA->isLiveOnEntryDef(Source) ||
-            !CurLoop->contains(Source->getBlock()))
-          return true;
+        // If there are no clobbering Defs in the loop, we still need to check
+        // for interfering Uses. If there are more accesses than the Promotion
+        // cap, give up, we're not walking a list that long. Otherwise, walk the
+        // list, check each Use if it's optimized to an access outside the loop.
+        // If yes, store is safe to hoist. This is fairly restrictive, but
+        // conservatively correct.
+        // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+        // moving accesses. Can also extend to dominating uses.
+        if ((!MSSA->isLiveOnEntryDef(Source) &&
+             CurLoop->contains(Source->getBlock())) ||
+            NoOfMemAccTooLarge)
+          return false;
+        for (auto *BB : CurLoop->getBlocks())
+          if (auto *Accesses = MSSA->getBlockAccesses(BB))
+            for (const auto &MA : *Accesses)
+              if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+                auto *MD = MU->getDefiningAccess();
+                if (!MSSA->isLiveOnEntryDef(MD) &&
+                    CurLoop->contains(MD->getBlock()))
+                  return false;
+              }
+        return true;
      }
      return false;
    }
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@ -303,7 +303,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
    // No need to check for instruction's operands are loop invariant.
    assert(L.hasLoopInvariantOperands(I) &&
           "Insts in a loop's preheader should have loop invariant operands!");
-    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
+    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false, false))
      continue;
    if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
      Changed = true;
--- a/llvm/test/Transforms/LICM/store-hoisting.ll
+++ b/llvm/test/Transforms/LICM/store-hoisting.ll
@ -1,5 +1,6 @@
-; RUN: opt -S -basicaa -licm %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
+; RUN: opt -S -basicaa -licm %s | FileCheck -check-prefixes=CHECK,AST %s
+; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck  -check-prefixes=CHECK,MSSA %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s

 define void @test(i32* %loc) {
 ; CHECK-LABEL: @test
@ -46,8 +47,11 @@ exit2:

 define i32* @false_negative_2use(i32* %loc) {
 ; CHECK-LABEL: @false_negative_2use
-; CHECK-LABEL: exit:
-; CHECK: store i32 0, i32* %loc
+; AST-LABEL: exit:
+; AST: store i32 0, i32* %loc
+; MSSA-LABEL: entry:
+; MSSA: store i32 0, i32* %loc
+; MSSA-LABEL: loop:
 entry:
  br label %loop

@ -119,6 +123,7 @@ exit:
  ret void
 }

+; Hoisting the store is actually valid here, as it dominates the load.
 define void @neg_ref(i32* %loc) {
 ; CHECK-LABEL: @neg_ref
 ; CHECK-LABEL: exit1:
@ -146,6 +151,35 @@ exit2:
  ret void
 }

+; Hoisting the store here leads to a miscompile.
+define void @neg_ref2(i32* %loc) {
+; CHECK-LABEL: @neg_ref2
+; CHECK-LABEL: exit1:
+; CHECK: store i32 0, i32* %loc
+; CHECK-LABEL: exit2:
+; CHECK: store i32 0, i32* %loc
+entry:
+  store i32 198, i32* %loc
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+  %v = load i32, i32* %loc
+  store i32 0, i32* %loc
+  %earlycnd = icmp eq i32 %v, 198
+  br i1 %earlycnd, label %exit1, label %backedge
+  
+backedge:
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 200
+  br i1 %cmp, label %loop, label %exit2
+
+exit1:
+  ret void
+exit2:
+  ret void
+}
+
 declare void @modref()

 define void @neg_modref(i32* %loc) {