From 403da6a69abc357066117370a5b80e03594b81a6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 9 Mar 2021 12:07:55 +0100 Subject: [PATCH] Reapply [LICM] Make promotion faster Relative to the previous implementation, this always uses aliasesUnknownInst() instead of aliasesPointer() to correctly handle atomics. The added test case was previously miscompiled. ----- Even when MemorySSA-based LICM is used, an AST is still populated for scalar promotion. As the AST has quadratic complexity, a lot of time is spent in this step despite the existing access count limit. This patch optimizes the identification of promotable stores. The idea here is pretty simple: We're only interested in must-alias mod sets of loop invariant pointers. As such, only populate the AST with loop-invariant loads and stores (anything else is definitely not promotable) and then discard any sets which alias with any of the remaining, definitely non-promotable accesses. If we promoted something, check whether this has made some other accesses loop invariant and thus possible promotion candidates. This is much faster in practice, because we need to perform AA queries for O(NumPromotable^2 + NumPromotable*NumNonPromotable) instead of O(NumTotal^2), and NumPromotable tends to be small. Additionally, promotable accesses have loop invariant pointers, for which AA is cheaper. This has a signicant positive compile-time impact. We save ~1.8% geomean on CTMark at O3, with 6% on lencod in particular and 25% on individual files. Conceptually, this change is NFC, but may not be so in practice, because the AST is only an approximation, and can produce different results depending on the order in which accesses are added. However, there is at least no impact on the number of promotions (licm.NumPromoted) in test-suite O3 configuration with this change. Differential Revision: https://reviews.llvm.org/D89264 --- llvm/include/llvm/Analysis/AliasSetTracker.h | 7 - llvm/lib/Analysis/AliasSetTracker.cpp | 11 +- llvm/lib/Transforms/Scalar/LICM.cpp | 141 ++++++++++++++----- llvm/test/Transforms/LICM/promote-atomic.ll | 34 +++++ 4 files changed, 143 insertions(+), 50 deletions(-) create mode 100644 llvm/test/Transforms/LICM/promote-atomic.ll diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index b27fd5aa92a7..ee6c371435e7 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -38,8 +38,6 @@ class AAResults; class AliasSetTracker; class BasicBlock; class LoadInst; -class Loop; -class MemorySSA; class AnyMemSetInst; class AnyMemTransferInst; class raw_ostream; @@ -343,8 +341,6 @@ class AliasSetTracker { struct ASTCallbackVHDenseMapInfo : public DenseMapInfo {}; AAResults &AA; - MemorySSA *MSSA = nullptr; - Loop *L = nullptr; ilist AliasSets; using PointerMapType = DenseMapblocks()) - if (auto *Accesses = MSSA->getBlockAccesses(BB)) - for (auto &Access : *Accesses) - if (auto *MUD = dyn_cast(&Access)) - add(MUD->getMemoryInst()); -} - // deleteValue method - This method is used to remove a pointer value from the // AliasSetTracker entirely. It should be used when an instruction is deleted // from the program to update the AST. If you don't use this, you would have diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index d76f7a2ce9cc..2875b301bbf1 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -185,6 +185,12 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE); +static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, + function_ref Fn); +static SmallVector, 0> +collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L, + SmallVectorImpl &MaybePromotable); + namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, @@ -203,9 +209,6 @@ private: std::unique_ptr collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA); - std::unique_ptr - collectAliasInfoForLoopWithMSSA(Loop *L, AAResults *AA, - MemorySSAUpdater *MSSAU); }; struct LegacyLICMPass : public LoopPass { @@ -446,31 +449,48 @@ bool LoopInvariantCodeMotion::runOnLoop( PredIteratorCache PIC; bool Promoted = false; + if (CurAST.get()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) { + // We can promote this alias set if it has a store, if it is a "Must" + // alias set, if the pointer is loop invariant, and if we are not + // eliminating any volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + !L->isLoopInvariant(AS.begin()->getValue())) + continue; - // Build an AST using MSSA. - if (!CurAST.get()) - CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get()); + assert( + !AS.empty() && + "Must alias set should have at least one pointer element in it!"); - // Loop over all of the alias sets in the tracker object. - for (AliasSet &AS : *CurAST) { - // We can promote this alias set if it has a store, if it is a "Must" - // alias set, if the pointer is loop invariant, and if we are not - // eliminating any volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - !L->isLoopInvariant(AS.begin()->getValue())) - continue; + SmallSetVector PointerMustAliases; + for (const auto &ASI : AS) + PointerMustAliases.insert(ASI.getValue()); - assert( - !AS.empty() && - "Must alias set should have at least one pointer element in it!"); + Promoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); + } + } else { + SmallVector MaybePromotable; + foreachMemoryAccess(MSSA, L, [&](Instruction *I) { + MaybePromotable.push_back(I); + }); - SmallSetVector PointerMustAliases; - for (const auto &ASI : AS) - PointerMustAliases.insert(ASI.getValue()); - - Promoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); + // Promoting one set of accesses may make the pointers for another set + // loop invariant, so run this in a loop (with the MaybePromotable set + // decreasing in size over time). + bool LocalPromoted; + do { + LocalPromoted = false; + for (const SmallSetVector &PointerMustAliases : + collectPromotionCandidates(MSSA, AA, L, MaybePromotable)) { + LocalPromoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, + LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE); + } + Promoted |= LocalPromoted; + } while (LocalPromoted); } // Once we have promoted values across the loop body we have to @@ -2233,6 +2253,70 @@ bool llvm::promoteLoopAccessesToScalars( return true; } +static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, + function_ref Fn) { + for (const BasicBlock *BB : L->blocks()) + if (const auto *Accesses = MSSA->getBlockAccesses(BB)) + for (const auto &Access : *Accesses) + if (const auto *MUD = dyn_cast(&Access)) + Fn(MUD->getMemoryInst()); +} + +static SmallVector, 0> +collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L, + SmallVectorImpl &MaybePromotable) { + AliasSetTracker AST(*AA); + + auto IsPotentiallyPromotable = [L](const Instruction *I) { + if (const auto *SI = dyn_cast(I)) + return L->isLoopInvariant(SI->getPointerOperand()); + if (const auto *LI = dyn_cast(I)) + return L->isLoopInvariant(LI->getPointerOperand()); + return false; + }; + + // Populate AST with potentially promotable accesses and remove them from + // MaybePromotable, so they will not be checked again on the next iteration. + SmallPtrSet AttemptingPromotion; + llvm::erase_if(MaybePromotable, [&](Instruction *I) { + if (IsPotentiallyPromotable(I)) { + AttemptingPromotion.insert(I); + AST.add(I); + return true; + } + return false; + }); + + // We're only interested in must-alias sets that contain a mod. + SmallVector Sets; + for (AliasSet &AS : AST) + if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias()) + Sets.push_back(&AS); + + if (Sets.empty()) + return {}; // Nothing to promote... + + // Discard any sets for which there is an aliasing non-promotable access. + foreachMemoryAccess(MSSA, L, [&](Instruction *I) { + if (AttemptingPromotion.contains(I)) + return; + + llvm::erase_if(Sets, [&](const AliasSet *AS) { + return AS->aliasesUnknownInst(I, *AA); + }); + }); + + SmallVector, 0> Result; + for (const AliasSet *Set : Sets) { + SmallSetVector PointerMustAliases; + for (const auto &ASI : *Set) + PointerMustAliases.insert(ASI.getValue()); + Result.push_back(std::move(PointerMustAliases)); + } + + return Result; +} + /// Returns an owning pointer to an alias set which incorporates aliasing info /// from L and all subloops of L. std::unique_ptr @@ -2253,15 +2337,6 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, return CurAST; } -std::unique_ptr -LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA( - Loop *L, AAResults *AA, MemorySSAUpdater *MSSAU) { - auto *MSSA = MSSAU->getMemorySSA(); - auto CurAST = std::make_unique(*AA, MSSA, L); - CurAST->addAllInstructionsInLoopUsingMSSA(); - return CurAST; -} - static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA) { diff --git a/llvm/test/Transforms/LICM/promote-atomic.ll b/llvm/test/Transforms/LICM/promote-atomic.ll new file mode 100644 index 000000000000..86107d738cea --- /dev/null +++ b/llvm/test/Transforms/LICM/promote-atomic.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -licm < %s | FileCheck %s + +%class.LiveThread = type { i64, %class.LiveThread* } + +@globallive = external dso_local global i64, align 8 + +; The store should not be sunk (via scalar promotion) past the cmpxchg. + +define void @test(%class.LiveThread* %live_thread) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[NEXT_UNPROCESSED_:%.*]] = getelementptr inbounds [[CLASS_LIVETHREAD:%.*]], %class.LiveThread* [[LIVE_THREAD:%.*]], i64 0, i32 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: store %class.LiveThread* undef, %class.LiveThread** [[NEXT_UNPROCESSED_]], align 8 +; CHECK-NEXT: [[XCHG:%.*]] = cmpxchg weak i64* @globallive, i64 undef, i64 undef release monotonic, align 8 +; CHECK-NEXT: [[DONE:%.*]] = extractvalue { i64, i1 } [[XCHG]], 1 +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + %next_unprocessed_ = getelementptr inbounds %class.LiveThread, %class.LiveThread* %live_thread, i64 0, i32 1 + br label %loop + +loop: + store %class.LiveThread* undef, %class.LiveThread** %next_unprocessed_, align 8 + %xchg = cmpxchg weak i64* @globallive, i64 undef, i64 undef release monotonic, align 8 + %done = extractvalue { i64, i1 } %xchg, 1 + br i1 %done, label %exit, label %loop + +exit: + ret void +} +