[LICM/MSSA] Add promotion to scalars by building an AliasSetTracker with MemorySSA.

Summary:
Experimentally we found that promotion to scalars carries less benefits
than sinking and hoisting in LICM. When using MemorySSA, we build an
AliasSetTracker on demand in order to reuse the current infrastructure.
We only build it if less than AccessCapForMSSAPromotion exist in the
loop, a cap that is by default set to 250. This value ensures there are
no runtime regressions, and there are small compile time gains for
pathological cases. A much lower value (20) was found to yield a single
regression in the llvm-test-suite and much higher benefits for compile
times. Conservatively we set the current cap to a high value, but we will
explore lowering it when MemorySSA is enabled by default.

Reviewers: sanjoy, chandlerc

Subscribers: nemanjai, jlebar, Prazek, george.burgess.iv, jfb, jsji, llvm-commits

Differential Revision: https://reviews.llvm.org/D56625

llvm-svn: 353339
This commit is contained in:
Alina Sbirlea 2019-02-06 20:25:17 +00:00
parent b3506bf985
commit 6cba96ed52
8 changed files with 161 additions and 53 deletions

View File

@ -36,6 +36,8 @@ namespace llvm {
class AliasSetTracker;
class BasicBlock;
class LoadInst;
class Loop;
class MemorySSA;
class AnyMemSetInst;
class AnyMemTransferInst;
class raw_ostream;
@ -341,6 +343,8 @@ class AliasSetTracker {
struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
AliasAnalysis &AA;
MemorySSA *MSSA;
Loop *L;
ilist<AliasSet> AliasSets;
using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
@ -353,6 +357,8 @@ public:
/// Create an empty collection of AliasSets, and use the specified alias
/// analysis object to disambiguate load and store addresses.
explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {}
explicit AliasSetTracker(AliasAnalysis &aa, MemorySSA *mssa, Loop *l)
: AA(aa), MSSA(mssa), L(l) {}
~AliasSetTracker() { clear(); }
/// These methods are used to add different types of instructions to the alias
@ -377,6 +383,7 @@ public:
void add(BasicBlock &BB); // Add all instructions in basic block
void add(const AliasSetTracker &AST); // Add alias relations from another AST
void addUnknown(Instruction *I);
void addAllInstructionsInLoopUsingMSSA();
void clear();

View File

@ -40,6 +40,7 @@ class BasicBlock;
class DataLayout;
class Loop;
class LoopInfo;
class MemoryAccess;
class MemorySSAUpdater;
class OptimizationRemarkEmitter;
class PredicatedScalarEvolution;
@ -148,14 +149,12 @@ void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
/// of the loop and loop safety information as arguments.
/// Diagnostics is emitted via \p ORE. It returns changed status.
bool promoteLoopAccessesToScalars(const SmallSetVector<Value *, 8> &,
SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &,
PredIteratorCache &, LoopInfo *,
DominatorTree *, const TargetLibraryInfo *,
Loop *, AliasSetTracker *,
ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *);
bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.

View File

@ -147,7 +147,7 @@ public:
/// Insts is a list of loads and stores to promote, and Name is the basename
/// for the PHIs to insert. After this is complete, the loads and stores are
/// removed from the code.
void run(const SmallVectorImpl<Instruction *> &Insts) const;
void run(const SmallVectorImpl<Instruction *> &Insts);
/// Return true if the specified instruction is in the Inst list.
///
@ -158,7 +158,7 @@ public:
/// This hook is invoked after all the stores are found and inserted as
/// available values.
virtual void doExtraRewritesBeforeFinalDeletion() const {}
virtual void doExtraRewritesBeforeFinalDeletion() {}
/// Clients can choose to implement this to get notified right before
/// a load is RAUW'd another value.

View File

@ -13,7 +13,9 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@ -533,6 +535,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
}
}
void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() {
assert(MSSA && L && "MSSA and L must be available");
for (const BasicBlock *BB : L->blocks())
if (auto *Accesses = MSSA->getBlockAccesses(BB))
for (auto &Access : *Accesses)
if (auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
add(MUD->getMemoryInst());
}
// deleteValue method - This method is used to remove a pointer value from the
// AliasSetTracker entirely. It should be used when an instruction is deleted
// from the program to update the AST. If you don't use this, you would have

View File

@ -186,7 +186,7 @@ public:
SSA.AddAvailableValue(PH, Init);
}
void doExtraRewritesBeforeFinalDeletion() const override {
void doExtraRewritesBeforeFinalDeletion() override {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
Instruction *InsertPos = InsertPts[i];

View File

@ -118,6 +118,16 @@ static cl::opt<bool> EnableLicmCap(
cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
"pathological cases, in exchange for faster compile"));
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
static cl::opt<unsigned> AccessCapForMSSAPromotion(
"max-acc-licm-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
"number of accesses allowed to be present in a loop in order to "
"enable memory promotion."));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
@ -166,6 +176,9 @@ private:
std::unique_ptr<AliasSetTracker>
collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
std::unique_ptr<AliasSetTracker>
collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
MemorySSAUpdater *MSSAU);
};
struct LegacyLICMPass : public LoopPass {
@ -293,12 +306,29 @@ bool LoopInvariantCodeMotion::runOnLoop(
std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
bool LocalDisablePromotion = false;
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
} else {
LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
MSSAU = make_unique<MemorySSAUpdater>(MSSA);
unsigned AccessCapCount = 0;
for (auto *BB : L->getBlocks()) {
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
for (const auto &MA : *Accesses) {
(void)MA;
AccessCapCount++;
if (AccessCapCount > AccessCapForMSSAPromotion) {
LocalDisablePromotion = true;
break;
}
}
}
if (LocalDisablePromotion)
break;
}
}
// Get the preheader block to move instructions into...
@ -332,7 +362,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
// make sure we catch that. An additional load may be generated in the
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
!LocalDisablePromotion) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@ -344,38 +375,45 @@ bool LoopInvariantCodeMotion::runOnLoop(
if (!HasCatchSwitch) {
SmallVector<Instruction *, 8> InsertPts;
SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks)
if (MSSAU)
MSSAInsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
if (MSSAU)
MSSAInsertPts.push_back(nullptr);
}
PredIteratorCache PIC;
bool Promoted = false;
if (CurAST.get()) {
// Loop over all of the alias sets in the tracker object.
for (AliasSet &AS : *CurAST) {
// We can promote this alias set if it has a store, if it is a "Must"
// alias set, if the pointer is loop invariant, and if we are not
// eliminating any volatile loads or stores.
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
!L->isLoopInvariant(AS.begin()->getValue()))
continue;
// Build an AST using MSSA.
if (!CurAST.get())
CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
assert(
!AS.empty() &&
"Must alias set should have at least one pointer element in it!");
// Loop over all of the alias sets in the tracker object.
for (AliasSet &AS : *CurAST) {
// We can promote this alias set if it has a store, if it is a "Must"
// alias set, if the pointer is loop invariant, and if we are not
// eliminating any volatile loads or stores.
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
!L->isLoopInvariant(AS.begin()->getValue()))
continue;
SmallSetVector<Value *, 8> PointerMustAliases;
for (const auto &ASI : AS)
PointerMustAliases.insert(ASI.getValue());
assert(
!AS.empty() &&
"Must alias set should have at least one pointer element in it!");
Promoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
CurAST.get(), &SafetyInfo, ORE);
}
SmallSetVector<Value *, 8> PointerMustAliases;
for (const auto &ASI : AS)
PointerMustAliases.insert(ASI.getValue());
Promoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
}
// FIXME: Promotion initially disabled when using MemorySSA.
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@ -399,7 +437,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
if (CurAST.get() && L->getParentLoop() && !DeleteAST)
if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
LoopToAliasSetMap[L] = std::move(CurAST);
if (MSSAU.get() && VerifyMemorySSA)
@ -1609,8 +1647,10 @@ class LoopPromoter : public LoadAndStorePromoter {
const SmallSetVector<Value *, 8> &PointerMustAliases;
SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
AliasSetTracker &AST;
MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
int Alignment;
@ -1637,15 +1677,16 @@ public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
const SmallSetVector<Value *, 8> &PMA,
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
bool UnorderedAtomic, const AAMDNodes &AATags,
ICFLoopSafetyInfo &SafetyInfo)
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
DebugLoc dl, int alignment, bool UnorderedAtomic,
const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
LI(li), DL(std::move(dl)), Alignment(alignment),
UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
{}
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@ -1657,7 +1698,7 @@ public:
return PointerMustAliases.count(Ptr);
}
void doExtraRewritesBeforeFinalDeletion() const override {
void doExtraRewritesBeforeFinalDeletion() override {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@ -1675,6 +1716,21 @@ public:
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
if (MSSAU) {
MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
MemoryAccess *NewMemAcc;
if (!MSSAInsertPoint) {
NewMemAcc = MSSAU->createMemoryAccessInBB(
NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
} else {
NewMemAcc =
MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
MSSAInsertPts[i] = NewMemAcc;
MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
// FIXME: true for safety, false may still be correct.
}
}
}
@ -1685,6 +1741,8 @@ public:
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
AST.deleteValue(I);
if (MSSAU)
MSSAU->removeMemoryAccess(I);
}
};
@ -1721,10 +1779,11 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
bool llvm::promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &PointerMustAliases,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
CurAST != nullptr && SafetyInfo != nullptr &&
@ -1941,8 +2000,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, PIC, *CurAST, *LI, DL, Alignment,
SawUnorderedAtomic, AATags, *SafetyInfo);
InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
@ -1957,13 +2016,21 @@ bool llvm::promoteLoopAccessesToScalars(
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
MemoryAccess *PreheaderLoadMemoryAccess;
if (MSSAU) {
PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
MSSAU->insertUse(NewMemUse);
}
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
return true;
}
@ -2016,6 +2083,15 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
return CurAST;
}
std::unique_ptr<AliasSetTracker>
LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
auto *MSSA = MSSAU->getMemorySSA();
auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
CurAST->addAllInstructionsInLoopUsingMSSA();
return CurAST;
}
/// Simple analysis hook. Clone alias set info.
///
void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,

View File

@ -349,8 +349,7 @@ LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
SSA.Initialize(SomeVal->getType(), BaseName);
}
void LoadAndStorePromoter::
run(const SmallVectorImpl<Instruction *> &Insts) const {
void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.

View File

@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \
; RUN: llc -enable-mssa-loop-dependency=false -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \
; RUN: FileCheck %s
; RUN: llc -enable-mssa-loop-dependency=true -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \
; RUN: FileCheck %s --check-prefix=MSSA
; Function Attrs: nounwind
define void @ec_GFp_nistp256_points_mul() {
; CHECK-LABEL: ec_GFp_nistp256_points_mul:
@ -14,6 +16,19 @@ define void @ec_GFp_nistp256_points_mul() {
; CHECK: subfc 5, 5, 7
; CHECK: subfe 5, 4, 6
; CHECK: sradi 5, 5, 63
; With MemorySSA, everything is taken out of the loop by licm.
; Loads and stores to undef are treated as non-aliasing.
; MSSA-LABEL: ec_GFp_nistp256_points_mul
; MSSA: ld 3, 0(3)
; MSSA: li 4, 0
; MSSA: subfic 5, 3, 0
; MSSA: subfze 5, 4
; MSSA: sradi 5, 5, 63
; MSSA: subfc 3, 3, 5
; MSSA: subfe 3, 4, 5
; MSSA: sradi 3, 3, 63
; MSSA: std 3, 0(3)
entry:
br label %fe_cmovznz.exit.i534.i.15
@ -32,3 +47,4 @@ fe_cmovznz.exit.i534.i.15: ; preds = %fe_cmovznz.exit.i53
store i64 %conv4.i60.i.i, i64* undef, align 16
br label %fe_cmovznz.exit.i534.i.15
}