forked from OSchip/llvm-project
[DSE] Use correct memory location for read clobber check
MSSA DSE starts at a killing store, finds an earlier store and then checks that the earlier store is not read along any paths (without being killed first). However, it uses the memory location of the killing store for that, not the earlier store that we're attempting to eliminate. This has a number of problems: * Mismatches between what BasicAA considers aliasing and what DSE considers an overwrite (even though both are correct in isolation) can result in miscompiles. This is PR48279, which D92045 tries to fix in a different way. The problem is that we're using a location from a store that is potentially not executed and thus may be UB, in which case analysis results can be arbitrary. * Metadata on the killing store may be used to determine aliasing, but there is no guarantee that the metadata is valid, as the specific killing store may not be executed. Using the metadata on the earlier store is valid (it is the store we're removing, so on any execution where its removal may be observed, it must be executed). * The location is imprecise. For full overwrites the killing store will always have a location that is larger or equal than the earlier access location, so it's beneficial to use the earlier access location. This is not the case for partial overwrites, in which case either location might be smaller. There is some room for improvement here. Using the earlier access location means that we can no longer cache which accesses are read for a given killing store, as we may be querying different locations. However, it turns out that simply dropping the cache has no notable impact on compile-time. Differential Revision: https://reviews.llvm.org/D93523
This commit is contained in:
parent
1c3a6671c2
commit
1f1145006b
|
@ -1634,18 +1634,6 @@ struct DSEState {
|
|||
/// basic block.
|
||||
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
|
||||
|
||||
struct CheckCache {
|
||||
SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
|
||||
SmallPtrSet<MemoryAccess *, 16> KnownReads;
|
||||
|
||||
bool isKnownNoRead(MemoryAccess *A) const {
|
||||
return KnownNoReads.find(A) != KnownNoReads.end();
|
||||
}
|
||||
bool isKnownRead(MemoryAccess *A) const {
|
||||
return KnownReads.find(A) != KnownReads.end();
|
||||
}
|
||||
};
|
||||
|
||||
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
|
||||
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
|
||||
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
|
||||
|
@ -1940,9 +1928,8 @@ struct DSEState {
|
|||
Optional<MemoryAccess *>
|
||||
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
|
||||
const MemoryLocation &DefLoc, const Value *DefUO,
|
||||
CheckCache &Cache, unsigned &ScanLimit,
|
||||
unsigned &WalkerStepLimit, bool IsMemTerm,
|
||||
unsigned &PartialLimit) {
|
||||
unsigned &ScanLimit, unsigned &WalkerStepLimit,
|
||||
bool IsMemTerm, unsigned &PartialLimit) {
|
||||
if (ScanLimit == 0 || WalkerStepLimit == 0) {
|
||||
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
|
||||
return None;
|
||||
|
@ -1954,6 +1941,7 @@ struct DSEState {
|
|||
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
|
||||
|
||||
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
|
||||
Optional<MemoryLocation> CurrentLoc;
|
||||
do {
|
||||
StepAgain = false;
|
||||
LLVM_DEBUG({
|
||||
|
@ -2017,12 +2005,8 @@ struct DSEState {
|
|||
// clobber, bail out, as the path is not profitable. We skip this check
|
||||
// for intrinsic calls, because the code knows how to handle memcpy
|
||||
// intrinsics.
|
||||
if (!isa<IntrinsicInst>(CurrentI) &&
|
||||
(Cache.KnownReads.contains(Current) ||
|
||||
isReadClobber(DefLoc, CurrentI))) {
|
||||
Cache.KnownReads.insert(Current);
|
||||
if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
|
||||
return None;
|
||||
}
|
||||
|
||||
// Quick check if there are direct uses that are read-clobbers.
|
||||
if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
|
||||
|
@ -2031,7 +2015,6 @@ struct DSEState {
|
|||
isReadClobber(DefLoc, UseOrDef->getMemoryInst());
|
||||
return false;
|
||||
})) {
|
||||
Cache.KnownReads.insert(Current);
|
||||
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
|
||||
return None;
|
||||
}
|
||||
|
@ -2045,13 +2028,24 @@ struct DSEState {
|
|||
}
|
||||
|
||||
// If Current does not have an analyzable write location, skip it
|
||||
auto CurrentLoc = getLocForWriteEx(CurrentI);
|
||||
CurrentLoc = getLocForWriteEx(CurrentI);
|
||||
if (!CurrentLoc) {
|
||||
StepAgain = true;
|
||||
Current = CurrentDef->getDefiningAccess();
|
||||
continue;
|
||||
}
|
||||
|
||||
// AliasAnalysis does not account for loops. Limit elimination to
|
||||
// candidates for which we can guarantee they always store to the same
|
||||
// memory location and not multiple locations in a loop.
|
||||
if (Current->getBlock() != KillingDef->getBlock() &&
|
||||
!IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
|
||||
StepAgain = true;
|
||||
Current = CurrentDef->getDefiningAccess();
|
||||
WalkerStepLimit -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsMemTerm) {
|
||||
// If the killing def is a memory terminator (e.g. lifetime.end), check
|
||||
// the next candidate if the current Current does not write the same
|
||||
|
@ -2062,17 +2056,6 @@ struct DSEState {
|
|||
}
|
||||
continue;
|
||||
} else {
|
||||
// AliasAnalysis does not account for loops. Limit elimination to
|
||||
// candidates for which we can guarantee they always store to the same
|
||||
// memory location and not multiple locations in a loop.
|
||||
if (Current->getBlock() != KillingDef->getBlock() &&
|
||||
!IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
|
||||
StepAgain = true;
|
||||
Current = CurrentDef->getDefiningAccess();
|
||||
WalkerStepLimit -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
int64_t InstWriteOffset, DepWriteOffset;
|
||||
auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
|
||||
DepWriteOffset, InstWriteOffset, BatchAA, &F);
|
||||
|
@ -2133,18 +2116,6 @@ struct DSEState {
|
|||
}
|
||||
--ScanLimit;
|
||||
NumDomMemDefChecks++;
|
||||
|
||||
// Check if we already visited this access.
|
||||
if (Cache.isKnownNoRead(UseAccess)) {
|
||||
LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess
|
||||
<< " is safe earlier.\n");
|
||||
continue;
|
||||
}
|
||||
if (Cache.isKnownRead(UseAccess)) {
|
||||
LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess
|
||||
<< " has a read-clobber earlier.\n");
|
||||
return None;
|
||||
}
|
||||
KnownNoReads.insert(UseAccess);
|
||||
|
||||
if (isa<MemoryPhi>(UseAccess)) {
|
||||
|
@ -2172,7 +2143,7 @@ struct DSEState {
|
|||
|
||||
// A memory terminator kills all preceeding MemoryDefs and all succeeding
|
||||
// MemoryAccesses. We do not have to check it's users.
|
||||
if (isMemTerminator(DefLoc, KillingI, UseInst)) {
|
||||
if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< " ... skipping, memterminator invalidates following accesses\n");
|
||||
|
@ -2187,19 +2158,13 @@ struct DSEState {
|
|||
|
||||
if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
|
||||
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
|
||||
Cache.KnownReads.insert(UseAccess);
|
||||
Cache.KnownReads.insert(StartAccess);
|
||||
Cache.KnownReads.insert(EarlierAccess);
|
||||
return None;
|
||||
}
|
||||
|
||||
// Uses which may read the original MemoryDef mean we cannot eliminate the
|
||||
// original MD. Stop walk.
|
||||
if (isReadClobber(DefLoc, UseInst)) {
|
||||
if (isReadClobber(*CurrentLoc, UseInst)) {
|
||||
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
|
||||
Cache.KnownReads.insert(UseAccess);
|
||||
Cache.KnownReads.insert(StartAccess);
|
||||
Cache.KnownReads.insert(EarlierAccess);
|
||||
return None;
|
||||
}
|
||||
|
||||
|
@ -2223,7 +2188,7 @@ struct DSEState {
|
|||
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
|
||||
// stores [0,1]
|
||||
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
|
||||
if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) {
|
||||
if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
|
||||
if (!isInvisibleToCallerAfterRet(DefUO) &&
|
||||
UseAccess != EarlierAccess) {
|
||||
BasicBlock *MaybeKillingBlock = UseInst->getParent();
|
||||
|
@ -2311,7 +2276,6 @@ struct DSEState {
|
|||
|
||||
// No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
|
||||
// potentially dead.
|
||||
Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end());
|
||||
return {EarlierAccess};
|
||||
}
|
||||
|
||||
|
@ -2550,7 +2514,6 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
|
|||
|
||||
bool Shortend = false;
|
||||
bool IsMemTerm = State.isMemTerminatorInst(SI);
|
||||
DSEState::CheckCache Cache;
|
||||
// Check if MemoryAccesses in the worklist are killed by KillingDef.
|
||||
for (unsigned I = 0; I < ToCheck.size(); I++) {
|
||||
Current = ToCheck[I];
|
||||
|
@ -2558,8 +2521,8 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
|
|||
continue;
|
||||
|
||||
Optional<MemoryAccess *> Next = State.getDomMemoryDef(
|
||||
KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit,
|
||||
WalkerStepLimit, IsMemTerm, PartialLimit);
|
||||
KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
|
||||
IsMemTerm, PartialLimit);
|
||||
|
||||
if (!Next) {
|
||||
LLVM_DEBUG(dbgs() << " finished walk\n");
|
||||
|
|
|
@ -41,13 +41,13 @@ bb3:
|
|||
}
|
||||
|
||||
; Post-dominating store.
|
||||
; TODO: The memset can be shortened.
|
||||
define void @accessible_after_return_2(i32* noalias %P, i1 %c) {
|
||||
; CHECK-LABEL: @accessible_after_return_2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
|
||||
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
|
||||
|
|
|
@ -36,6 +36,8 @@ define i32 @test_out_of_bounds_store_nonlocal(i1 %c) {
|
|||
; CHECK-NEXT: [[D:%.*]] = alloca [1 x i32], align 4
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[D]], i64 0, i64 0
|
||||
; CHECK-NEXT: store i32 10, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: br label [[FOR_INC:%.*]]
|
||||
; CHECK: for.inc:
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[FOR_BODY_1:%.*]], label [[FOR_END:%.*]]
|
||||
|
|
|
@ -67,13 +67,11 @@ end:
|
|||
ret void
|
||||
}
|
||||
|
||||
; TODO: The store to %a0 is dead, because only %a1 is read later.
|
||||
; The store to %a0 is dead, because only %a1 is read later.
|
||||
define void @test3(i1 %c) {
|
||||
; CHECK-LABEL: @test3(
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1
|
||||
; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
|
||||
; CHECK-NEXT: store i8 1, i8* [[A0]], align 1
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1
|
||||
|
@ -102,10 +100,8 @@ else:
|
|||
define void @test4(i1 %c) {
|
||||
; CHECK-LABEL: @test4(
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1
|
||||
; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
|
||||
; CHECK-NEXT: store i8 1, i8* [[A1]], align 1
|
||||
; CHECK-NEXT: store i8 1, i8* [[A0]], align 1
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1
|
||||
|
|
|
@ -4,13 +4,13 @@
|
|||
; Assume that %p1 != %p2 if and only if %c is true. In that case the noalias
|
||||
; metadata is correct, but the first store cannot be eliminated, as it may be
|
||||
; read-clobbered by the load.
|
||||
; TODO The store is incorrectly eliminated.
|
||||
define void @test(i1 %c, i8* %p1, i8* %p2) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: store i8 0, i8* [[P1:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1, !alias.scope !0
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: store i8 1, i8* [[P1:%.*]], align 1, !noalias !0
|
||||
; CHECK-NEXT: store i8 1, i8* [[P1]], align 1, !noalias !0
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: store i8 2, i8* [[P1]], align 1
|
||||
|
|
Loading…
Reference in New Issue