From 297088d1add70cae554c8f96dde3a97a3e8d56a5 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 8 Jun 2021 21:23:08 +0100 Subject: [PATCH] Revert "[DSE] Remove stores in the same loop iteration" Apparently non-dead stores are being removed, as noted in D100464. This reverts commit 222aeb4d51a46c5a81c9e4ccb16d1d19dd21ec95. --- .../Scalar/DeadStoreElimination.cpp | 78 +++++-------------- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll | 4 +- llvm/test/Other/opt-O2-pipeline.ll | 2 +- .../Other/opt-O3-pipeline-enable-matrix.ll | 2 +- llvm/test/Other/opt-O3-pipeline.ll | 2 +- llvm/test/Other/opt-Os-pipeline.ll | 2 +- .../DeadStoreElimination/multiblock-loops.ll | 1 + 7 files changed, 26 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index e6b9c2a0e580..94f3e0d3464f 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,12 +40,10 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -855,11 +853,6 @@ struct DSEState { PostDominatorTree &PDT; const TargetLibraryInfo &TLI; const DataLayout &DL; - const LoopInfo &LI; - - // Whether the function contains any irreducible control flow, useful for - // being accurately able to detect loops. - bool ContainsIrreducibleLoops; // All MemoryDefs that potentially could kill other MemDefs. SmallVector MemDefs; @@ -883,15 +876,14 @@ struct DSEState { DenseMap IOLs; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI, - const LoopInfo &LI) + PostDominatorTree &PDT, const TargetLibraryInfo &TLI) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()), LI(LI) {} + DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI, const LoopInfo &LI) { - DSEState State(F, AA, MSSA, DT, PDT, TLI, LI); + const TargetLibraryInfo &TLI) { + DSEState State(F, AA, MSSA, DT, PDT, TLI); // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -919,9 +911,6 @@ struct DSEState { State.InvisibleToCallerAfterRet.insert({&AI, true}); } - // Collect whether there is any irreducible control flow in the function. - State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI); - return State; } @@ -936,12 +925,6 @@ struct DSEState { isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, const MemoryLocation &Later, const MemoryLocation &Earlier, int64_t &EarlierOff, int64_t &LaterOff) { - // AliasAnalysis does not always account for loops. Limit overwrite checks - // to dependencies for which we can guarantee they are independant of any - // loops they are in. - if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier)) - return OW_Unknown; - // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { @@ -1267,27 +1250,11 @@ struct DSEState { return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); } - /// Returns true if a dependency between \p Current and \p KillingDef is - /// guaranteed to be loop invariant for the loops that they are in. Either - /// because they are known to be in the same block, in the same loop level or - /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation - /// during execution of the containing function. - bool isGuaranteedLoopIndependent(const Instruction *Current, - const Instruction *KillingDef, - const MemoryLocation &CurrentLoc) { - // If the dependency is within the same block or loop level (being careful of - // irreducible loops), we know that AA will return a valid result for the - // memory dependency. (Both at the function level, outside of any loop, - // would also be valid but we currently disable that to limit compile time). - if (Current->getParent() == KillingDef->getParent()) - return true; - const Loop *CurrentLI = LI.getLoopFor(Current->getParent()); - if (!ContainsIrreducibleLoops && CurrentLI && - CurrentLI == LI.getLoopFor(KillingDef->getParent())) - return true; - - // Otherwise check the memory location is invariant to any loops. - auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { + /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible + /// loop. In particular, this guarantees that it only references a single + /// MemoryLocation during execution of the containing function. + bool IsGuaranteedLoopInvariant(Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) { Ptr = Ptr->stripPointerCasts(); if (auto *I = dyn_cast(Ptr)) { if (isa(Ptr)) @@ -1301,7 +1268,7 @@ struct DSEState { return true; }; - const Value *Ptr = CurrentLoc.Ptr->stripPointerCasts(); + Ptr = Ptr->stripPointerCasts(); if (auto *I = dyn_cast(Ptr)) { if (I->getParent()->isEntryBlock()) return true; @@ -1431,9 +1398,9 @@ struct DSEState { // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same - // memory location and not located in different loops. - if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) { - LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n"); + // memory location and not multiple locations in a loop. + if (Current->getBlock() != KillingDef->getBlock() && + !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { StepAgain = true; Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; @@ -1869,13 +1836,12 @@ struct DSEState { } }; -static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, - DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI, - const LoopInfo &LI) { +bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { bool MadeChange = false; - DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI); + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -2048,9 +2014,8 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); - LoopInfo &LI = AM.getResult(F); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2064,7 +2029,6 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); return PA; } @@ -2090,9 +2054,8 @@ public: MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); - LoopInfo &LI = getAnalysis().getLoopInfo(); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2114,8 +2077,6 @@ public: AU.addRequired(); AU.addPreserved(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); } }; @@ -2132,7 +2093,6 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index 25051b885f9d..021012aff54d 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -517,8 +517,8 @@ ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: MemCpy Optimization -; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Dead Store Elimination +; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass @@ -877,8 +877,8 @@ ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: MemCpy Optimization -; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Dead Store Elimination +; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 542a79820e4f..36370d6cd6d3 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -162,8 +162,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index 4c5e126f229f..c39361dcc8a2 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -167,8 +167,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 97b11fec08b9..af77e09c2496 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -167,8 +167,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index b65e550ce9eb..e24d3d3f57bf 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -148,8 +148,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Dead Store Elimination +; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll index b1878084f5fd..e887a478120f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll @@ -111,6 +111,7 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: