Revert "[DSE] Remove stores in the same loop iteration"

Apparently non-dead stores are being removed, as noted in D100464.

This reverts commit 222aeb4d51.
This commit is contained in:
David Green 2021-06-08 21:23:08 +01:00
parent a7142f5c91
commit 297088d1ad
7 changed files with 26 additions and 65 deletions

View File

@ -40,12 +40,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@ -855,11 +853,6 @@ struct DSEState {
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
const LoopInfo &LI;
// Whether the function contains any irreducible control flow, useful for
// being accurately able to detect loops.
bool ContainsIrreducibleLoops;
// All MemoryDefs that potentially could kill other MemDefs.
SmallVector<MemoryDef *, 64> MemDefs;
@ -883,15 +876,14 @@ struct DSEState {
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
DL(F.getParent()->getDataLayout()), LI(LI) {}
DL(F.getParent()->getDataLayout()) {}
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
const TargetLibraryInfo &TLI, const LoopInfo &LI) {
DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
const TargetLibraryInfo &TLI) {
DSEState State(F, AA, MSSA, DT, PDT, TLI);
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@ -919,9 +911,6 @@ struct DSEState {
State.InvisibleToCallerAfterRet.insert({&AI, true});
}
// Collect whether there is any irreducible control flow in the function.
State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
return State;
}
@ -936,12 +925,6 @@ struct DSEState {
isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
const MemoryLocation &Later, const MemoryLocation &Earlier,
int64_t &EarlierOff, int64_t &LaterOff) {
// AliasAnalysis does not always account for loops. Limit overwrite checks
// to dependencies for which we can guarantee they are independant of any
// loops they are in.
if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
return OW_Unknown;
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
@ -1267,27 +1250,11 @@ struct DSEState {
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
/// Returns true if a dependency between \p Current and \p KillingDef is
/// guaranteed to be loop invariant for the loops that they are in. Either
/// because they are known to be in the same block, in the same loop level or
/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
/// during execution of the containing function.
bool isGuaranteedLoopIndependent(const Instruction *Current,
const Instruction *KillingDef,
const MemoryLocation &CurrentLoc) {
// If the dependency is within the same block or loop level (being careful of
// irreducible loops), we know that AA will return a valid result for the
// memory dependency. (Both at the function level, outside of any loop,
// would also be valid but we currently disable that to limit compile time).
if (Current->getParent() == KillingDef->getParent())
return true;
const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
if (!ContainsIrreducibleLoops && CurrentLI &&
CurrentLI == LI.getLoopFor(KillingDef->getParent()))
return true;
// Otherwise check the memory location is invariant to any loops.
auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) {
/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
/// loop. In particular, this guarantees that it only references a single
/// MemoryLocation during execution of the containing function.
bool IsGuaranteedLoopInvariant(Value *Ptr) {
auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
Ptr = Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (isa<AllocaInst>(Ptr))
@ -1301,7 +1268,7 @@ struct DSEState {
return true;
};
const Value *Ptr = CurrentLoc.Ptr->stripPointerCasts();
Ptr = Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (I->getParent()->isEntryBlock())
return true;
@ -1431,9 +1398,9 @@ struct DSEState {
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
// memory location and not located in different loops.
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
// memory location and not multiple locations in a loop.
if (Current->getBlock() != KillingDef->getBlock() &&
!IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
StepAgain = true;
Current = CurrentDef->getDefiningAccess();
WalkerStepLimit -= 1;
@ -1869,13 +1836,12 @@ struct DSEState {
}
};
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
const TargetLibraryInfo &TLI,
const LoopInfo &LI) {
bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
const TargetLibraryInfo &TLI) {
bool MadeChange = false;
DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
@ -2048,9 +2014,8 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@ -2064,7 +2029,6 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<MemorySSAAnalysis>();
PA.preserve<LoopAnalysis>();
return PA;
}
@ -2090,9 +2054,8 @@ public:
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@ -2114,8 +2077,6 @@ public:
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
};
@ -2132,7 +2093,6 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)

View File

@ -517,8 +517,8 @@
; GCN-O2-NEXT: Function Alias Analysis Results
; GCN-O2-NEXT: Memory SSA
; GCN-O2-NEXT: MemCpy Optimization
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Dead Store Elimination
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Canonicalize natural loops
; GCN-O2-NEXT: LCSSA Verifier
; GCN-O2-NEXT: Loop-Closed SSA Form Pass
@ -877,8 +877,8 @@
; GCN-O3-NEXT: Function Alias Analysis Results
; GCN-O3-NEXT: Memory SSA
; GCN-O3-NEXT: MemCpy Optimization
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Dead Store Elimination
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Canonicalize natural loops
; GCN-O3-NEXT: LCSSA Verifier
; GCN-O3-NEXT: Loop-Closed SSA Form Pass

View File

@ -162,8 +162,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass

View File

@ -167,8 +167,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass

View File

@ -167,8 +167,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass

View File

@ -148,8 +148,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass

View File

@ -111,6 +111,7 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias
; CHECK: for.body4.lr.ph:
; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4: