Use MemorySSA in LICM to do sinking and hoisting.

Summary:
Step 2 in using MemorySSA in LICM:
Use MemorySSA in LICM to do sinking and hoisting, all under "EnableMSSALoopDependency" flag.
Promotion is disabled.

Enable flag in LICM sink/hoist tests to test correctness of this change. Moved one test which
relied on promotion, in order to test all sinking tests.

Reviewers: sanjoy, davide, gberry, george.burgess.iv

Subscribers: llvm-commits, Prazek

Differential Revision: https://reviews.llvm.org/D40375

llvm-svn: 350879
This commit is contained in:
Alina Sbirlea 2019-01-10 19:29:04 +00:00
parent 54c04301b7
commit cae12edaaa
18 changed files with 351 additions and 153 deletions

View File

@ -35,6 +35,7 @@
#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/BasicBlock.h" #include "llvm/IR/BasicBlock.h"

View File

@ -41,6 +41,7 @@ class BasicBlock;
class DataLayout; class DataLayout;
class Loop; class Loop;
class LoopInfo; class LoopInfo;
class MemorySSAUpdater;
class OptimizationRemarkEmitter; class OptimizationRemarkEmitter;
class PredicatedScalarEvolution; class PredicatedScalarEvolution;
class PredIteratorCache; class PredIteratorCache;
@ -109,7 +110,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status. /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, TargetTransformInfo *, Loop *, TargetLibraryInfo *, TargetTransformInfo *, Loop *,
AliasSetTracker *, ICFLoopSafetyInfo *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *ORE); OptimizationRemarkEmitter *ORE);
/// Walk the specified region of the CFG (defined by all blocks /// Walk the specified region of the CFG (defined by all blocks
@ -122,7 +123,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
/// ORE. It returns changed status. /// ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, Loop *, AliasSetTracker *, TargetLibraryInfo *, Loop *, AliasSetTracker *,
ICFLoopSafetyInfo *, OptimizationRemarkEmitter *ORE); MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *ORE);
/// This function deletes dead loops. The caller of this function needs to /// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead. /// guarantee that the loop is infact dead.
@ -274,7 +276,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
/// If \p ORE is set use it to emit optimization remarks. /// If \p ORE is set use it to emit optimization remarks.
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST, Loop *CurLoop, AliasSetTracker *CurAST,
bool TargetExecutesOncePerLoop, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
OptimizationRemarkEmitter *ORE = nullptr); OptimizationRemarkEmitter *ORE = nullptr);
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.

View File

@ -46,11 +46,11 @@
#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h" #include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h" #include "llvm/IR/Constants.h"
@ -69,6 +69,7 @@
#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm> #include <algorithm>
@ -106,16 +107,29 @@ static cl::opt<int>
LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
cl::desc("How many instruction to cross product using AA")); cl::desc("How many instruction to cross product using AA"));
// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
// pathological cases, in exchange for faster compile. This is to be removed
// if MemorySSA starts to address the same issue. This flag applies only when
// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
// getDefiningAccess, which may not be precise, since optimizeUses is capped.
static cl::opt<bool> EnableLicmCap(
"enable-licm-cap", cl::init(false), cl::Hidden,
cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
"pathological cases, in exchange for faster compile"));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo, const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop); TargetTransformInfo *TTI, bool &FreeInLoop);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE); MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, bool FreeInLoop); MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
bool FreeInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst, static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT, const DominatorTree *DT,
const Loop *CurLoop, const Loop *CurLoop,
@ -125,14 +139,14 @@ static bool isSafeToExecuteUnconditionally(Instruction &Inst,
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop, AliasSetTracker *CurAST, Loop *CurLoop,
AliasAnalysis *AA); AliasAnalysis *AA);
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
static Instruction * Loop *CurLoop);
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, static Instruction *CloneInstructionInExitBlock(
const LoopInfo *LI, Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo); const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
AliasSetTracker *AST); AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest, static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo); ICFLoopSafetyInfo &SafetyInfo);
@ -194,8 +208,10 @@ struct LegacyLICMPass : public LoopPass {
AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>();
if (EnableMSSALoopDependency) if (EnableMSSALoopDependency) {
AU.addRequired<MemorySSAWrapperPass>(); AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU); getLoopAnalysisUsage(AU);
} }
@ -275,7 +291,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
std::unique_ptr<AliasSetTracker> CurAST = collectAliasInfoForLoop(L, LI, AA); std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
} else {
LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
MSSAU = make_unique<MemorySSAUpdater>(MSSA);
}
// Get the preheader block to move instructions into... // Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Preheader = L->getLoopPreheader();
@ -296,10 +320,10 @@ bool LoopInvariantCodeMotion::runOnLoop(
// //
if (L->hasDedicatedExits()) if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST.get(), &SafetyInfo, ORE); CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
if (Preheader) if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST.get(), &SafetyInfo, ORE); CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
// Now that all loop invariants have been removed from the loop, promote any // Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can. // memory references to scalars that we can.
@ -328,27 +352,30 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool Promoted = false; bool Promoted = false;
// Loop over all of the alias sets in the tracker object. if (CurAST.get()) {
for (AliasSet &AS : *CurAST) { // Loop over all of the alias sets in the tracker object.
// We can promote this alias set if it has a store, if it is a "Must" for (AliasSet &AS : *CurAST) {
// alias set, if the pointer is loop invariant, and if we are not // We can promote this alias set if it has a store, if it is a "Must"
// eliminating any volatile loads or stores. // alias set, if the pointer is loop invariant, and if we are not
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || // eliminating any volatile loads or stores.
!L->isLoopInvariant(AS.begin()->getValue())) if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
continue; !L->isLoopInvariant(AS.begin()->getValue()))
continue;
assert( assert(
!AS.empty() && !AS.empty() &&
"Must alias set should have at least one pointer element in it!"); "Must alias set should have at least one pointer element in it!");
SmallSetVector<Value *, 8> PointerMustAliases; SmallSetVector<Value *, 8> PointerMustAliases;
for (const auto &ASI : AS) for (const auto &ASI : AS)
PointerMustAliases.insert(ASI.getValue()); PointerMustAliases.insert(ASI.getValue());
Promoted |= promoteLoopAccessesToScalars( Promoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
CurAST.get(), &SafetyInfo, ORE); CurAST.get(), &SafetyInfo, ORE);
}
} }
// FIXME: Promotion initially disabled when using MemorySSA.
// Once we have promoted values across the loop body we have to // Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined // recursively reform LCSSA as any nested loop may now have values defined
@ -372,9 +399,12 @@ bool LoopInvariantCodeMotion::runOnLoop(
// If this loop is nested inside of another one, save the alias information // If this loop is nested inside of another one, save the alias information
// for when we process the outer loop. // for when we process the outer loop.
if (L->getParentLoop() && !DeleteAST) if (CurAST.get() && L->getParentLoop() && !DeleteAST)
LoopToAliasSetMap[L] = std::move(CurAST); LoopToAliasSetMap[L] = std::move(CurAST);
if (MSSAU.get() && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
if (Changed && SE) if (Changed && SE)
SE->forgetLoopDispositions(L); SE->forgetLoopDispositions(L);
return Changed; return Changed;
@ -388,13 +418,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, DominatorTree *DT, TargetLibraryInfo *TLI,
TargetTransformInfo *TTI, Loop *CurLoop, TargetTransformInfo *TTI, Loop *CurLoop,
AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) { OptimizationRemarkEmitter *ORE) {
// Verify inputs. // Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && CurAST && SafetyInfo != nullptr && CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion"); "Unexpected input to sinkRegion.");
assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
"Either AliasSetTracker or MemorySSA should be initialized.");
// We want to visit children before parents. We will enque all the parents // We want to visit children before parents. We will enque all the parents
// before their children in the worklist and process the worklist in reverse // before their children in the worklist and process the worklist in reverse
@ -418,7 +451,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
salvageDebugInfo(I); salvageDebugInfo(I);
++II; ++II;
eraseInstruction(I, *SafetyInfo, CurAST); eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
Changed = true; Changed = true;
continue; continue;
} }
@ -430,18 +463,20 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// //
bool FreeInLoop = false; bool FreeInLoop = false;
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
!I.mayHaveSideEffects()) { !I.mayHaveSideEffects()) {
if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
if (!FreeInLoop) { if (!FreeInLoop) {
++II; ++II;
eraseInstruction(I, *SafetyInfo, CurAST); eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
} }
Changed = true; Changed = true;
} }
} }
} }
} }
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed; return Changed;
} }
@ -458,6 +493,7 @@ private:
LoopInfo *LI; LoopInfo *LI;
DominatorTree *DT; DominatorTree *DT;
Loop *CurLoop; Loop *CurLoop;
MemorySSAUpdater *MSSAU;
// A map of blocks in the loop to the block their instructions will be hoisted // A map of blocks in the loop to the block their instructions will be hoisted
// to. // to.
@ -468,8 +504,9 @@ private:
DenseMap<BranchInst *, BasicBlock *> HoistableBranches; DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
public: public:
ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop) ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
: LI(LI), DT(DT), CurLoop(CurLoop) {} MemorySSAUpdater *MSSAU)
: LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
void registerPossiblyHoistableBranch(BranchInst *BI) { void registerPossiblyHoistableBranch(BranchInst *BI) {
// We can only hoist conditional branches with loop invariant operands. // We can only hoist conditional branches with loop invariant operands.
@ -644,6 +681,9 @@ public:
if (HoistTarget == InitialPreheader) { if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader. // Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
if (MSSAU)
MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
// The new preheader dominates the loop header. // The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
@ -674,14 +714,17 @@ public:
/// ///
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) { OptimizationRemarkEmitter *ORE) {
// Verify inputs. // Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion"); "Unexpected input to hoistRegion.");
assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
"Either AliasSetTracker or MemorySSA should be initialized.");
ControlFlowHoister CFH(LI, DT, CurLoop); ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
// Keep track of instructions that have been hoisted, as they may need to be // Keep track of instructions that have been hoisted, as they may need to be
// re-hoisted if they end up not dominating all of their uses. // re-hoisted if they end up not dominating all of their uses.
@ -708,10 +751,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) { &I, I.getModule()->getDataLayout(), TLI)) {
LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
<< '\n'); << '\n');
CurAST->copyValue(&I, C); if (CurAST)
CurAST->copyValue(&I, C);
// FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
I.replaceAllUsesWith(C); I.replaceAllUsesWith(C);
if (isInstructionTriviallyDead(&I, TLI)) if (isInstructionTriviallyDead(&I, TLI))
eraseInstruction(I, *SafetyInfo, CurAST); eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
Changed = true; Changed = true;
continue; continue;
} }
@ -723,11 +768,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header // and we have accurately duplicated the control flow from the loop header
// to that block. // to that block.
if (CurLoop->hasLoopInvariantOperands(&I) && if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
isSafeToExecuteUnconditionally( isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE, I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) { CurLoop->getLoopPreheader()->getTerminator())) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE); hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, ORE);
HoistedInstructions.push_back(&I); HoistedInstructions.push_back(&I);
Changed = true; Changed = true;
continue; continue;
@ -751,10 +797,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
SafetyInfo->insertInstructionTo(Product, I.getParent()); SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I); Product->insertAfter(&I);
I.replaceAllUsesWith(Product); I.replaceAllUsesWith(Product);
eraseInstruction(I, *SafetyInfo, CurAST); eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
SafetyInfo, ORE); SafetyInfo, MSSAU, ORE);
HoistedInstructions.push_back(ReciprocalDivisor); HoistedInstructions.push_back(ReciprocalDivisor);
Changed = true; Changed = true;
continue; continue;
@ -767,7 +813,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurLoop->hasLoopInvariantOperands(&I) && CurLoop->hasLoopInvariantOperands(&I) &&
SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) { SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE); hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, ORE);
HoistedInstructions.push_back(&I); HoistedInstructions.push_back(&I);
Changed = true; Changed = true;
continue; continue;
@ -781,7 +828,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
PN->setIncomingBlock( PN->setIncomingBlock(
i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
ORE); MSSAU, ORE);
assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
Changed = true; Changed = true;
continue; continue;
@ -824,8 +871,11 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
} }
} }
} }
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Now that we've finished hoisting make sure that LI and DT are still valid. // Now that we've finished hoisting make sure that LI and DT are still
// valid.
#ifndef NDEBUG #ifndef NDEBUG
if (Changed) { if (Changed) {
assert(DT->verify(DominatorTree::VerificationLevel::Fast) && assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
@ -905,25 +955,53 @@ bool isHoistableAndSinkableInst(Instruction &I) {
isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)); isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
} }
/// Return true if all of the alias sets within this AST are known not to /// Return true if all of the alias sets within this AST are known not to
/// contain a Mod. /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
bool isReadOnly(AliasSetTracker *CurAST) { bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
for (AliasSet &AS : *CurAST) { const Loop *L) {
if (!AS.isForwardingAliasSet() && AS.isMod()) { if (CurAST) {
return false; for (AliasSet &AS : *CurAST) {
if (!AS.isForwardingAliasSet() && AS.isMod()) {
return false;
}
} }
return true;
} else { /*MSSAU*/
for (auto *BB : L->getBlocks())
if (MSSAU->getMemorySSA()->getBlockDefs(BB))
return false;
return true;
} }
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
const MemorySSAUpdater *MSSAU) {
for (auto *BB : L->getBlocks())
if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
for (const auto &Acc : *Accs) {
if (isa<MemoryPhi>(&Acc))
continue;
const auto *MUD = cast<MemoryUseOrDef>(&Acc);
if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
return false;
}
}
return true; return true;
} }
} }
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST, Loop *CurLoop, AliasSetTracker *CurAST,
MemorySSAUpdater *MSSAU,
bool TargetExecutesOncePerLoop, bool TargetExecutesOncePerLoop,
OptimizationRemarkEmitter *ORE) { OptimizationRemarkEmitter *ORE) {
// If we don't understand the instruction, bail early. // If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I)) if (!isHoistableAndSinkableInst(I))
return false; return false;
MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
// Loads have extra constraints we have to verify before we can hoist them. // Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered()) if (!LI->isUnordered())
@ -943,8 +1021,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (isLoadInvariantInLoop(LI, DT, CurLoop)) if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true; return true;
bool Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), bool Invalidated;
CurAST, CurLoop, AA); if (CurAST)
Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
// Check loop-invariant address because this may also be a sinkable load // Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant. // whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@ -969,7 +1052,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (match(CI, m_Intrinsic<Intrinsic::assume>())) if (match(CI, m_Intrinsic<Intrinsic::assume>()))
// Assumes don't actually alias anything or throw // Assumes don't actually alias anything or throw
return true; return true;
// Handle simple cases by querying alias analysis. // Handle simple cases by querying alias analysis.
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory) if (Behavior == FMRB_DoesNotAccessMemory)
@ -981,17 +1064,24 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
// TODO: expand to writeable arguments // TODO: expand to writeable arguments
for (Value *Op : CI->arg_operands()) for (Value *Op : CI->arg_operands())
if (Op->getType()->isPointerTy() && if (Op->getType()->isPointerTy()) {
pointerInvalidatedByLoop( bool Invalidated;
if (CurAST)
Invalidated = pointerInvalidatedByLoop(
MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()), MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
CurAST, CurLoop, AA)) CurAST, CurLoop, AA);
return false; else
Invalidated = pointerInvalidatedByLoopWithMSSA(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
if (Invalidated)
return false;
}
return true; return true;
} }
// If this call only reads from memory and there are no writes to memory // If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate. // in the loop, we can hoist or sink the call as appropriate.
if (isReadOnly(CurAST)) if (isReadOnly(CurAST, MSSAU, CurLoop))
return true; return true;
} }
@ -1002,18 +1092,21 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else if (auto *FI = dyn_cast<FenceInst>(&I)) { } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
// Fences alias (most) everything to provide ordering. For the moment, // Fences alias (most) everything to provide ordering. For the moment,
// just give up if there are any other memory operations in the loop. // just give up if there are any other memory operations in the loop.
auto Begin = CurAST->begin(); if (CurAST) {
assert(Begin != CurAST->end() && "must contain FI"); auto Begin = CurAST->begin();
if (std::next(Begin) != CurAST->end()) assert(Begin != CurAST->end() && "must contain FI");
// constant memory for instance, TODO: handle better if (std::next(Begin) != CurAST->end())
return false; // constant memory for instance, TODO: handle better
auto *UniqueI = Begin->getUniqueInstruction(); return false;
if (!UniqueI) auto *UniqueI = Begin->getUniqueInstruction();
// other memory op, give up if (!UniqueI)
return false; // other memory op, give up
(void)FI; //suppress unused variable warning return false;
assert(UniqueI == FI && "AS must contain FI"); (void)FI; // suppress unused variable warning
return true; assert(UniqueI == FI && "AS must contain FI");
return true;
} else // MSSAU
return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
} else if (auto *SI = dyn_cast<StoreInst>(&I)) { } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isUnordered()) if (!SI->isUnordered())
return false; // Don't sink/hoist volatile or ordered atomic store! return false; // Don't sink/hoist volatile or ordered atomic store!
@ -1023,17 +1116,29 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// load store promotion instead. TODO: We can extend this to cases where // load store promotion instead. TODO: We can extend this to cases where
// there is exactly one write to the location and that write dominates an // there is exactly one write to the location and that write dominates an
// arbitrary number of reads in the loop. // arbitrary number of reads in the loop.
auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); if (CurAST) {
auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
if (AS.isRef() || !AS.isMustAlias()) if (AS.isRef() || !AS.isMustAlias())
// Quick exit test, handled by the full path below as well. // Quick exit test, handled by the full path below as well.
return false;
auto *UniqueI = AS.getUniqueInstruction();
if (!UniqueI)
// other memory op, give up
return false;
assert(UniqueI == SI && "AS must contain SI");
return true;
} else { // MSSAU
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
if (!EnableLicmCap) {
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
if (MSSA->isLiveOnEntryDef(Source) ||
!CurLoop->contains(Source->getBlock()))
return true;
}
return false; return false;
auto *UniqueI = AS.getUniqueInstruction(); }
if (!UniqueI)
// other memory op, give up
return false;
assert(UniqueI == SI && "AS must contain SI");
return true;
} }
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@ -1117,10 +1222,9 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
return true; return true;
} }
static Instruction * static Instruction *CloneInstructionInExitBlock(
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
const LoopSafetyInfo *SafetyInfo) {
Instruction *New; Instruction *New;
if (auto *CI = dyn_cast<CallInst>(&I)) { if (auto *CI = dyn_cast<CallInst>(&I)) {
const auto &BlockColors = SafetyInfo->getBlockColors(); const auto &BlockColors = SafetyInfo->getBlockColors();
@ -1156,6 +1260,21 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
if (!I.getName().empty()) if (!I.getName().empty())
New->setName(I.getName() + ".le"); New->setName(I.getName() + ".le");
MemoryAccess *OldMemAcc;
if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
// Create a new MemoryAccess and let MemorySSA set its defining access.
MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
New, nullptr, New->getParent(), MemorySSA::Beginning);
if (NewMemAcc) {
if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
MSSAU->insertDef(MemDef, /*RenameUses=*/true);
else {
auto *MemUse = cast<MemoryUse>(NewMemAcc);
MSSAU->insertUse(MemUse);
}
}
}
// Build LCSSA PHI nodes for any in-loop operands. Note that this is // Build LCSSA PHI nodes for any in-loop operands. Note that this is
// particularly cheap because we can rip off the PHI node that we're // particularly cheap because we can rip off the PHI node that we're
// replacing for the number and blocks of the predecessors. // replacing for the number and blocks of the predecessors.
@ -1179,9 +1298,11 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
} }
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
AliasSetTracker *AST) { AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
if (AST) if (AST)
AST->deleteValue(&I); AST->deleteValue(&I);
if (MSSAU)
MSSAU->removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I); SafetyInfo.removeInstruction(&I);
I.eraseFromParent(); I.eraseFromParent();
} }
@ -1196,7 +1317,8 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
static Instruction *sinkThroughTriviallyReplaceablePHI( static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI, PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies, SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
MemorySSAUpdater *MSSAU) {
assert(isTriviallyReplaceablePHI(*TPN, *I) && assert(isTriviallyReplaceablePHI(*TPN, *I) &&
"Expect only trivially replaceable PHI"); "Expect only trivially replaceable PHI");
BasicBlock *ExitBlock = TPN->getParent(); BasicBlock *ExitBlock = TPN->getParent();
@ -1205,8 +1327,8 @@ static Instruction *sinkThroughTriviallyReplaceablePHI(
if (It != SunkCopies.end()) if (It != SunkCopies.end())
New = It->second; New = It->second;
else else
New = SunkCopies[ExitBlock] = New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
return New; return New;
} }
@ -1230,7 +1352,8 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
LoopInfo *LI, const Loop *CurLoop, LoopInfo *LI, const Loop *CurLoop,
LoopSafetyInfo *SafetyInfo) { LoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU) {
#ifndef NDEBUG #ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks; SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks); CurLoop->getUniqueExitBlocks(ExitBlocks);
@ -1280,7 +1403,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
"Expect all predecessors are in the loop"); "Expect all predecessors are in the loop");
if (PN->getBasicBlockIndex(PredBB) >= 0) { if (PN->getBasicBlockIndex(PredBB) >= 0) {
BasicBlock *NewPred = SplitBlockPredecessors( BasicBlock *NewPred = SplitBlockPredecessors(
ExitBB, PredBB, ".split.loop.exit", DT, LI, nullptr, true); ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
// Since we do not allow splitting EH-block with BlockColors in // Since we do not allow splitting EH-block with BlockColors in
// canSplitPredecessors(), we can simply assign predecessor's color to // canSplitPredecessors(), we can simply assign predecessor's color to
// the new block. // the new block.
@ -1301,7 +1424,8 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
/// ///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, bool FreeInLoop) { MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
bool FreeInLoop) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() { ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@ -1353,7 +1477,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// Split predecessors of the PHI so that we can make users trivially // Split predecessors of the PHI so that we can make users trivially
// replaceable. // replaceable.
splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo); splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
// Should rebuild the iterators, as they may be invalidated by // Should rebuild the iterators, as they may be invalidated by
// splitPredecessorsOfLoopExit(). // splitPredecessorsOfLoopExit().
@ -1388,10 +1512,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
assert(ExitBlockSet.count(PN->getParent()) && assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!"); "The LCSSA PHI is not in an exit block!");
// The PHI must be trivially replaceable. // The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies, Instruction *New = sinkThroughTriviallyReplaceablePHI(
SafetyInfo, CurLoop); PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New); PN->replaceAllUsesWith(New);
eraseInstruction(*PN, *SafetyInfo, nullptr); eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
Changed = true; Changed = true;
} }
return Changed; return Changed;
@ -1402,7 +1526,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
/// ///
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) { MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
<< "\n"); << "\n");
ORE->emit([&]() { ORE->emit([&]() {
@ -1427,6 +1551,13 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
else else
// Move the new node to the destination block, before its terminator. // Move the new node to the destination block, before its terminator.
moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo); moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
if (MSSAU) {
// If moving, I just moved a load or store, so update MemorySSA.
MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU->getMemorySSA()->getMemoryAccess(&I));
if (OldMemAcc)
MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
}
// Do not retain debug locations when we are moving instructions to different // Do not retain debug locations when we are moving instructions to different
// basic blocks, because we want to avoid jumpy line tables. Calls, however, // basic blocks, because we want to avoid jumpy line tables. Calls, however,
@ -1831,7 +1962,7 @@ bool llvm::promoteLoopAccessesToScalars(
// If the SSAUpdater didn't use the load in the preheader, just zap it now. // If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty()) if (PreheaderLoad->use_empty())
eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST); eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
return true; return true;
} }
@ -1961,6 +2092,18 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
return false; return false;
} }
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop) {
MemoryAccess *Source;
// See declaration of EnableLicmCap for usage details.
if (EnableLicmCap)
Source = MU->getDefiningAccess();
else
Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
return !MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock());
}
/// Little predicate that returns true if the specified basic block is in /// Little predicate that returns true if the specified basic block is in
/// a subloop of the current one, not the current one itself. /// a subloop of the current one, not the current one itself.
/// ///

View File

@ -304,7 +304,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// No need to check for instruction's operands are loop invariant. // No need to check for instruction's operands are loop invariant.
assert(L.hasLoopInvariantOperands(I) && assert(L.hasLoopInvariantOperands(I) &&
"Insts in a loop's preheader should have loop invariant operands!"); "Insts in a loop's preheader should have loop invariant operands!");
if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, false)) if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
continue; continue;
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
Changed = true; Changed = true;

View File

@ -2,6 +2,7 @@
; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2 ; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2
; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2 ; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2
; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 -enable-mssa-loop-dependency=true -verify-memoryssa %s | FileCheck %s --check-prefix=ALIAS-N2
declare i32 @foo() readonly argmemonly nounwind declare i32 @foo() readonly argmemonly nounwind
declare i32 @foo2() readonly nounwind declare i32 @foo2() readonly nounwind
@ -11,6 +12,9 @@ define void @test(i32* %loc) {
; CHECK-LABEL: @test ; CHECK-LABEL: @test
; CHECK: @foo ; CHECK: @foo
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; ALIAS-N2-LABEL: @test
; ALIAS-N2: @foo
; ALIAS-N2-LABEL: loop:
br label %loop br label %loop
loop: loop:
@ -24,6 +28,9 @@ define void @test_neg(i32* %loc) {
; CHECK-LABEL: @test_neg ; CHECK-LABEL: @test_neg
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; CHECK: @foo ; CHECK: @foo
; ALIAS-N2-LABEL: @test_neg
; ALIAS-N2-LABEL: loop:
; ALIAS-N2: @foo
br label %loop br label %loop
loop: loop:
@ -36,6 +43,9 @@ define void @test2(i32* noalias %loc, i32* noalias %loc2) {
; CHECK-LABEL: @test2 ; CHECK-LABEL: @test2
; CHECK: @bar ; CHECK: @bar
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; ALIAS-N2-LABEL: @test2
; ALIAS-N2: @bar
; ALIAS-N2-LABEL: loop:
br label %loop br label %loop
loop: loop:
@ -49,6 +59,9 @@ define void @test3(i32* %loc) {
; CHECK-LABEL: @test3 ; CHECK-LABEL: @test3
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; CHECK: @bar ; CHECK: @bar
; ALIAS-N2-LABEL: @test3
; ALIAS-N2-LABEL: loop:
; ALIAS-N2: @bar
br label %loop br label %loop
loop: loop:
@ -64,6 +77,9 @@ define void @test4(i32* %loc, i32* %loc2) {
; CHECK-LABEL: @test4 ; CHECK-LABEL: @test4
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; CHECK: @bar ; CHECK: @bar
; ALIAS-N2-LABEL: @test4
; ALIAS-N2-LABEL: loop:
; ALIAS-N2: @bar
br label %loop br label %loop
loop: loop:
@ -77,6 +93,7 @@ declare i32 @foo_new(i32*) readonly
; we clump foo_new with bar. ; we clump foo_new with bar.
; With the N2 Alias analysis diagnostic tool, we are able to hoist the ; With the N2 Alias analysis diagnostic tool, we are able to hoist the
; argmemonly bar call out of the loop. ; argmemonly bar call out of the loop.
; Using MemorySSA we can also hoist bar.
define void @test5(i32* %loc2, i32* noalias %loc) { define void @test5(i32* %loc2, i32* noalias %loc) {
; ALIAS-N2-LABEL: @test5 ; ALIAS-N2-LABEL: @test5
@ -103,6 +120,10 @@ define void @test6(i32* noalias %loc, i32* noalias %loc2) {
; CHECK: %val = load i32, i32* %loc2 ; CHECK: %val = load i32, i32* %loc2
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; CHECK: @llvm.memcpy ; CHECK: @llvm.memcpy
; ALIAS-N2-LABEL: @test6
; ALIAS-N2: %val = load i32, i32* %loc2
; ALIAS-N2-LABEL: loop:
; ALIAS-N2: @llvm.memcpy
br label %loop br label %loop
loop: loop:
@ -119,6 +140,10 @@ define void @test7(i32* noalias %loc, i32* noalias %loc2) {
; CHECK: %val = load i32, i32* %loc2 ; CHECK: %val = load i32, i32* %loc2
; CHECK-LABEL: loop: ; CHECK-LABEL: loop:
; CHECK: @custom_memcpy ; CHECK: @custom_memcpy
; ALIAS-N2-LABEL: @test7
; ALIAS-N2: %val = load i32, i32* %loc2
; ALIAS-N2-LABEL: loop:
; ALIAS-N2: @custom_memcpy
br label %loop br label %loop
loop: loop:

View File

@ -1,5 +1,6 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,5 +1,6 @@
; RUN: opt < %s -licm -S | FileCheck %s ; RUN: opt < %s -licm -S | FileCheck %s
; RUN: opt < %s -strip-debug -licm -S | FileCheck %s ; RUN: opt < %s -strip-debug -licm -S | FileCheck %s
; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s --check-prefixes=CHECK,MSSA
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
@ -16,7 +17,9 @@ define void @fn1() !dbg !6 {
; CHECK-NEXT: [[_TMP2:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[_TMP2:%.*]] = load i32, i32* @a, align 4
; CHECK-NEXT: [[_TMP3:%.*]] = load i32, i32* @b, align 4 ; CHECK-NEXT: [[_TMP3:%.*]] = load i32, i32* @b, align 4
; CHECK-NEXT: [[_TMP4:%.*]] = sdiv i32 [[_TMP2]], [[_TMP3]] ; CHECK-NEXT: [[_TMP4:%.*]] = sdiv i32 [[_TMP2]], [[_TMP3]]
; MSSA-NEXT: store i32 [[_TMP4:%.*]], i32* @c, align 4
; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]]
br label %bb3 br label %bb3
bb3: ; preds = %bb3, %0 bb3: ; preds = %bb3, %0

View File

@ -1,5 +1,7 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(simplify-cfg,licm)' -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,5 @@
; RUN: opt -licm -S < %s | FileCheck %s ; RUN: opt -licm -S < %s | FileCheck %s
; RUN: opt -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
; Function Attrs: noinline norecurse nounwind readnone ssp uwtable ; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
define zeroext i1 @invariant_denom(double %v) #0 { define zeroext i1 @invariant_denom(double %v) #0 {

View File

@ -1,5 +1,6 @@
; REQUIRES: asserts ; REQUIRES: asserts
; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm" ; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm"
; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -disable-basicaa -stats -S 2>&1 | grep "1 licm"
@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"

View File

@ -1,5 +1,6 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s ; RUN: opt -S -basicaa -licm < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"

View File

@ -5,6 +5,11 @@
; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=1 -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-ENABLED ; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=1 -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-ENABLED
; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=0 -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-DISABLED ; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=0 -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-DISABLED
; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=1 -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-ENABLED
; Enable run below when adding promotion. e.g. "store i32 %phi, i32* %p" is promoted to phi.lcssa.
; opt -passes='require<opt-remark-emit>,loop(licm)' -licm-control-flow-hoisting=0 -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s -check-prefixes=CHECK,CHECK-DISABLED
; CHECK-LABEL: @triangle_phi ; CHECK-LABEL: @triangle_phi
define void @triangle_phi(i32 %x, i32* %p) { define void @triangle_phi(i32 %x, i32* %p) {
; CHECK-LABEL: entry: ; CHECK-LABEL: entry:

View File

@ -1,5 +1,6 @@
; RUN: opt -S -licm < %s | FileCheck %s ; RUN: opt -S -licm < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32" target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32"

View File

@ -1,5 +1,6 @@
; RUN: opt < %s -licm -S | FileCheck %s ; RUN: opt < %s -licm -S | FileCheck %s
; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
@X = global i32 0 ; <i32*> [#uses=1] @X = global i32 0 ; <i32*> [#uses=1]

View File

@ -0,0 +1,50 @@
; RUN: opt < %s -basicaa -licm -S | FileCheck %s
; Test moved from sinking.ll, as it tests sinking of a store who alone touches
; a memory location in a loop.
; Store can be sunk out of exit block containing indirectbr instructions after
; D50925. Updated to use an argument instead of undef, due to PR38989.
define void @test12(i32* %ptr) {
; CHECK-LABEL: @test12
; CHECK: store
; CHECK-NEXT: br label %lab4
br label %lab4
lab4:
br label %lab20
lab5:
br label %lab20
lab6:
br label %lab4
lab7:
br i1 undef, label %lab8, label %lab13
lab8:
br i1 undef, label %lab13, label %lab10
lab10:
br label %lab7
lab13:
ret void
lab20:
br label %lab21
lab21:
; CHECK: lab21:
; CHECK-NOT: store
; CHECK: br i1 false, label %lab21, label %lab22
store i32 36127957, i32* %ptr, align 4
br i1 undef, label %lab21, label %lab22
lab22:
; CHECK: lab22:
; CHECK-NOT: store
; CHECK-NEXT: indirectbr i8* undef
indirectbr i8* undef, [label %lab5, label %lab6, label %lab7]
}

View File

@ -2,6 +2,7 @@
; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK ; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK
; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(licm),loop-sink' \ ; RUN: opt -S < %s -passes='require<opt-remark-emit>,loop(licm),loop-sink' \
; RUN: | FileCheck %s --check-prefix=CHECK-SINK ; RUN: | FileCheck %s --check-prefix=CHECK-SINK
; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM
; Original source code: ; Original source code:
; int g; ; int g;

View File

@ -1,5 +1,7 @@
; RUN: opt < %s -basicaa -licm -S | FileCheck %s ; RUN: opt < %s -basicaa -licm -S | FileCheck %s
; RUN: opt < %s -debugify -basicaa -licm -S | FileCheck %s -check-prefix=DEBUGIFY ; RUN: opt < %s -debugify -basicaa -licm -S | FileCheck %s -check-prefix=DEBUGIFY
; RUN: opt < %s -basicaa -licm -S -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s
declare i32 @strlen(i8*) readonly nounwind declare i32 @strlen(i8*) readonly nounwind
@ -358,50 +360,7 @@ exit:
ret i32 %lcssa ret i32 %lcssa
} }
; Can't sink stores out of exit blocks containing indirectbr instructions ; @test12 moved to sink-promote.ll, as it tests sinking and promotion.
; because loop simplify does not create dedicated exits for such blocks. Test
; that by sinking the store from lab21 to lab22, but not further.
define void @test12() {
; CHECK-LABEL: @test12
br label %lab4
lab4:
br label %lab20
lab5:
br label %lab20
lab6:
br label %lab4
lab7:
br i1 undef, label %lab8, label %lab13
lab8:
br i1 undef, label %lab13, label %lab10
lab10:
br label %lab7
lab13:
ret void
lab20:
br label %lab21
lab21:
; CHECK: lab21:
; CHECK-NOT: store
; CHECK: br i1 false, label %lab21, label %lab22
store i32 36127957, i32* undef, align 4
br i1 undef, label %lab21, label %lab22
lab22:
; CHECK: lab22:
; CHECK: store
; CHECK-NEXT: indirectbr i8* undef
indirectbr i8* undef, [label %lab5, label %lab6, label %lab7]
}
; Test that we don't crash when trying to sink stores and there's no preheader ; Test that we don't crash when trying to sink stores and there's no preheader
; available (which is used for creating loads that may be used by the SSA ; available (which is used for creating loads that may be used by the SSA

View File

@ -1,5 +1,6 @@
; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s ; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s ; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S | FileCheck %s
; RUN: opt -basicaa -sroa -loop-rotate -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
; The objects *p and *q are aliased to each other, but even though *q is ; The objects *p and *q are aliased to each other, but even though *q is
; volatile, *p can be considered invariant in the loop. Check if it is moved ; volatile, *p can be considered invariant in the loop. Check if it is moved
; out of the loop. ; out of the loop.