forked from OSchip/llvm-project
[LICM&MSSA] Limit store hoisting.
Summary: If there is no clobbering access for a store inside the loop, that store can only be hoisted if there are no interfearing loads. A more general verification introduced here: there are no loads that are not optimized to an access outside the loop. Addresses PR40586. Reviewers: george.burgess.iv Subscribers: sanjoy, jlebar, Prazek, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D57967 llvm-svn: 353734
This commit is contained in:
parent
4b86c474ff
commit
605b21739d
|
@ -112,7 +112,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
|
|||
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
|
||||
TargetLibraryInfo *, TargetTransformInfo *, Loop *,
|
||||
AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
|
||||
OptimizationRemarkEmitter *ORE);
|
||||
bool, OptimizationRemarkEmitter *);
|
||||
|
||||
/// Walk the specified region of the CFG (defined by all blocks
|
||||
/// dominated by the specified block, and that are in the current loop) in depth
|
||||
|
@ -124,8 +124,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
|
|||
/// ORE. It returns changed status.
|
||||
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
|
||||
TargetLibraryInfo *, Loop *, AliasSetTracker *,
|
||||
MemorySSAUpdater *, ICFLoopSafetyInfo *,
|
||||
OptimizationRemarkEmitter *ORE);
|
||||
MemorySSAUpdater *, ICFLoopSafetyInfo *, bool,
|
||||
OptimizationRemarkEmitter *);
|
||||
|
||||
/// This function deletes dead loops. The caller of this function needs to
|
||||
/// guarantee that the loop is infact dead.
|
||||
|
@ -276,6 +276,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
|
|||
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
|
||||
Loop *CurLoop, AliasSetTracker *CurAST,
|
||||
MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
|
||||
bool NoOfMemAccessesTooLarge,
|
||||
OptimizationRemarkEmitter *ORE = nullptr);
|
||||
|
||||
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
|
||||
|
|
|
@ -306,7 +306,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
|
|||
|
||||
std::unique_ptr<AliasSetTracker> CurAST;
|
||||
std::unique_ptr<MemorySSAUpdater> MSSAU;
|
||||
bool LocalDisablePromotion = false;
|
||||
bool NoOfMemAccTooLarge = false;
|
||||
|
||||
if (!MSSA) {
|
||||
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
|
||||
CurAST = collectAliasInfoForLoop(L, LI, AA);
|
||||
|
@ -321,12 +322,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
|
|||
(void)MA;
|
||||
AccessCapCount++;
|
||||
if (AccessCapCount > AccessCapForMSSAPromotion) {
|
||||
LocalDisablePromotion = true;
|
||||
NoOfMemAccTooLarge = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (LocalDisablePromotion)
|
||||
if (NoOfMemAccTooLarge)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -350,10 +351,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
|
|||
//
|
||||
if (L->hasDedicatedExits())
|
||||
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
|
||||
CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
|
||||
CurAST.get(), MSSAU.get(), &SafetyInfo,
|
||||
NoOfMemAccTooLarge, ORE);
|
||||
if (Preheader)
|
||||
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
|
||||
CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
|
||||
CurAST.get(), MSSAU.get(), &SafetyInfo,
|
||||
NoOfMemAccTooLarge, ORE);
|
||||
|
||||
// Now that all loop invariants have been removed from the loop, promote any
|
||||
// memory references to scalars that we can.
|
||||
|
@ -363,7 +366,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
|
|||
// preheader for SSA updater, so also avoid sinking when no preheader
|
||||
// is available.
|
||||
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
|
||||
!LocalDisablePromotion) {
|
||||
!NoOfMemAccTooLarge) {
|
||||
// Figure out the loop exits and their insertion points
|
||||
SmallVector<BasicBlock *, 8> ExitBlocks;
|
||||
L->getUniqueExitBlocks(ExitBlocks);
|
||||
|
@ -457,7 +460,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
DominatorTree *DT, TargetLibraryInfo *TLI,
|
||||
TargetTransformInfo *TTI, Loop *CurLoop,
|
||||
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
|
||||
ICFLoopSafetyInfo *SafetyInfo,
|
||||
ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
|
||||
OptimizationRemarkEmitter *ORE) {
|
||||
|
||||
// Verify inputs.
|
||||
|
@ -501,7 +504,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
//
|
||||
bool FreeInLoop = false;
|
||||
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
|
||||
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
|
||||
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
|
||||
NoOfMemAccTooLarge, ORE) &&
|
||||
!I.mayHaveSideEffects()) {
|
||||
if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
|
||||
if (!FreeInLoop) {
|
||||
|
@ -755,7 +759,7 @@ public:
|
|||
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
||||
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
|
||||
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
|
||||
ICFLoopSafetyInfo *SafetyInfo,
|
||||
ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
|
||||
OptimizationRemarkEmitter *ORE) {
|
||||
// Verify inputs.
|
||||
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
|
||||
|
@ -808,7 +812,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
// and we have accurately duplicated the control flow from the loop header
|
||||
// to that block.
|
||||
if (CurLoop->hasLoopInvariantOperands(&I) &&
|
||||
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
|
||||
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
|
||||
NoOfMemAccTooLarge, ORE) &&
|
||||
isSafeToExecuteUnconditionally(
|
||||
I, DT, CurLoop, SafetyInfo, ORE,
|
||||
CurLoop->getLoopPreheader()->getTerminator())) {
|
||||
|
@ -1035,6 +1040,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
|
|||
Loop *CurLoop, AliasSetTracker *CurAST,
|
||||
MemorySSAUpdater *MSSAU,
|
||||
bool TargetExecutesOncePerLoop,
|
||||
bool NoOfMemAccTooLarge,
|
||||
OptimizationRemarkEmitter *ORE) {
|
||||
// If we don't understand the instruction, bail early.
|
||||
if (!isHoistableAndSinkableInst(I))
|
||||
|
@ -1173,9 +1179,28 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
|
|||
return true;
|
||||
if (!EnableLicmCap) {
|
||||
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
|
||||
if (MSSA->isLiveOnEntryDef(Source) ||
|
||||
!CurLoop->contains(Source->getBlock()))
|
||||
return true;
|
||||
// If there are no clobbering Defs in the loop, we still need to check
|
||||
// for interfering Uses. If there are more accesses than the Promotion
|
||||
// cap, give up, we're not walking a list that long. Otherwise, walk the
|
||||
// list, check each Use if it's optimized to an access outside the loop.
|
||||
// If yes, store is safe to hoist. This is fairly restrictive, but
|
||||
// conservatively correct.
|
||||
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
|
||||
// moving accesses. Can also extend to dominating uses.
|
||||
if ((!MSSA->isLiveOnEntryDef(Source) &&
|
||||
CurLoop->contains(Source->getBlock())) ||
|
||||
NoOfMemAccTooLarge)
|
||||
return false;
|
||||
for (auto *BB : CurLoop->getBlocks())
|
||||
if (auto *Accesses = MSSA->getBlockAccesses(BB))
|
||||
for (const auto &MA : *Accesses)
|
||||
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
|
||||
auto *MD = MU->getDefiningAccess();
|
||||
if (!MSSA->isLiveOnEntryDef(MD) &&
|
||||
CurLoop->contains(MD->getBlock()))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -303,7 +303,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
|
|||
// No need to check for instruction's operands are loop invariant.
|
||||
assert(L.hasLoopInvariantOperands(I) &&
|
||||
"Insts in a loop's preheader should have loop invariant operands!");
|
||||
if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
|
||||
if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false, false))
|
||||
continue;
|
||||
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
|
||||
Changed = true;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: opt -S -basicaa -licm %s | FileCheck %s
|
||||
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
|
||||
; RUN: opt -S -basicaa -licm %s | FileCheck -check-prefixes=CHECK,AST %s
|
||||
; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,MSSA %s
|
||||
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s
|
||||
|
||||
define void @test(i32* %loc) {
|
||||
; CHECK-LABEL: @test
|
||||
|
@ -46,8 +47,11 @@ exit2:
|
|||
|
||||
define i32* @false_negative_2use(i32* %loc) {
|
||||
; CHECK-LABEL: @false_negative_2use
|
||||
; CHECK-LABEL: exit:
|
||||
; CHECK: store i32 0, i32* %loc
|
||||
; AST-LABEL: exit:
|
||||
; AST: store i32 0, i32* %loc
|
||||
; MSSA-LABEL: entry:
|
||||
; MSSA: store i32 0, i32* %loc
|
||||
; MSSA-LABEL: loop:
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -119,6 +123,7 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Hoisting the store is actually valid here, as it dominates the load.
|
||||
define void @neg_ref(i32* %loc) {
|
||||
; CHECK-LABEL: @neg_ref
|
||||
; CHECK-LABEL: exit1:
|
||||
|
@ -146,6 +151,35 @@ exit2:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Hoisting the store here leads to a miscompile.
|
||||
define void @neg_ref2(i32* %loc) {
|
||||
; CHECK-LABEL: @neg_ref2
|
||||
; CHECK-LABEL: exit1:
|
||||
; CHECK: store i32 0, i32* %loc
|
||||
; CHECK-LABEL: exit2:
|
||||
; CHECK: store i32 0, i32* %loc
|
||||
entry:
|
||||
store i32 198, i32* %loc
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [0, %entry], [%iv.next, %backedge]
|
||||
%v = load i32, i32* %loc
|
||||
store i32 0, i32* %loc
|
||||
%earlycnd = icmp eq i32 %v, 198
|
||||
br i1 %earlycnd, label %exit1, label %backedge
|
||||
|
||||
backedge:
|
||||
%iv.next = add i32 %iv, 1
|
||||
%cmp = icmp slt i32 %iv, 200
|
||||
br i1 %cmp, label %loop, label %exit2
|
||||
|
||||
exit1:
|
||||
ret void
|
||||
exit2:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @modref()
|
||||
|
||||
define void @neg_modref(i32* %loc) {
|
||||
|
|
Loading…
Reference in New Issue