[DSE] Only eliminate candidates that always store the same loc.

AliasAnalysis/MemoryLocation does not account for loops. Two
MemoryLocation can be must-overwrite, even if the first one writes
multiple locations in a loop.

This patch prevents removing such stores, by only considering candidates
that are known to be loop invariant, or executed in the same BB.

Currently the invariant check is quite conservative and only considers
Alloca and Alloca-like instructions and arguments as invariant base pointers.
It also considers GEPs with all constant indices and invariant bases as
invariant.

This can be improved in the future, but the current implementation has
only minor impact on the total number of stores eliminated (25903 vs
26047 for the baseline). There are some 2-10% swings for some individual
benchmarks. In roughly half of the cases, the number of stores removed
increases actually, because we skip candidates that are unlikely to be
valid candidates early.
This commit is contained in:
Florian Hahn 2020-09-14 11:49:27 +01:00
parent eef30334d1
commit f715d81c9d
2 changed files with 39 additions and 0 deletions

View File

@ -1861,6 +1861,32 @@ struct DSEState {
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
/// loop. In particular, this guarantees that it only references a single
/// MemoryLocation during execution of the containing function.
bool IsGuaranteedLoopInvariant(Value *Ptr) {
auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
Ptr = Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (isa<AllocaInst>(Ptr))
return true;
if (isAllocLikeFn(I, &TLI))
return true;
return false;
}
return true;
};
Ptr = Ptr->stripPointerCasts();
if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
GEP->hasAllConstantIndices();
}
return IsGuaranteedLoopInvariantBase(Ptr);
}
// Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
// no read access between them or on any other path to a function exit block
// if \p DefLoc is not accessible after the function returns. If there is no
@ -1992,6 +2018,17 @@ struct DSEState {
}
continue;
} else {
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
// memory location and not multiple locations in a loop.
if (Current->getBlock() != KillingDef->getBlock() &&
!IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
StepAgain = true;
Current = CurrentDef->getDefiningAccess();
WalkerStepLimit -= 1;
continue;
}
int64_t InstWriteOffset, DepWriteOffset;
auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
DepWriteOffset, InstWriteOffset, BatchAA, &F);

View File

@ -111,6 +111,7 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias
; CHECK: for.body4.lr.ph:
; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
@ -327,6 +328,7 @@ define i16 @test_loop_carried_dep() {
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
; CHECK-NEXT: br i1 [[EXITCOND]], label [[IF_END10:%.*]], label [[IF_END]]
; CHECK: if.end: