From 88e2bac94d14b1e6d65aea3d3b3d48695ec0835d Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 26 Mar 2018 19:52:54 +0000 Subject: [PATCH] [MemorySSA] Fix exponential compile-time updating MemorySSA. MemorySSAUpdater::getPreviousDefRecursive is a recursive algorithm, for each block, it computes the previous definition for each predecessor, then takes those definitions and combines them. But currently it doesn't remember results which it already computed; this means it can visit the same block multiple times, which adds up to exponential time overall. To fix this, this patch adds a cache. If we computed the result for a block already, we don't need to visit it again because we'll come up with the same result. Well, unless we RAUW a MemoryPHI; in that case, the TrackingVH will be updated automatically. This matches the original source paper for this algorithm. The testcase isn't really a test for the bug, but it adds coverage for the case where tryRemoveTrivialPhi erases an existing PHI node. (It's hard to write a good regression test for a performance issue.) Differential Revision: https://reviews.llvm.org/D44715 llvm-svn: 328577 --- llvm/include/llvm/Analysis/MemorySSAUpdater.h | 9 +++- llvm/lib/Analysis/MemorySSAUpdater.cpp | 43 +++++++++------ .../Transforms/GVNHoist/hoist-simplify-phi.ll | 54 +++++++++++++++++++ 3 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 llvm/test/Transforms/GVNHoist/hoist-simplify-phi.ll diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index b36b2f01dac6..3f4ef0614ca9 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -43,6 +43,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -141,8 +142,12 @@ private: void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where); MemoryAccess *getPreviousDef(MemoryAccess *); MemoryAccess *getPreviousDefInBlock(MemoryAccess *); - MemoryAccess *getPreviousDefFromEnd(BasicBlock *); - MemoryAccess *getPreviousDefRecursive(BasicBlock *); + MemoryAccess * + getPreviousDefFromEnd(BasicBlock *, + DenseMap> &); + MemoryAccess * + getPreviousDefRecursive(BasicBlock *, + DenseMap> &); MemoryAccess *recursePhi(MemoryAccess *Phi); template MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index f5d89f699a5a..0d1e6471e985 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -37,15 +37,26 @@ using namespace llvm; // that there are two or more definitions needing to be merged. // This still will leave non-minimal form in the case of irreducible control // flow, where phi nodes may be in cycles with themselves, but unnecessary. -MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { - // Single predecessor case, just recurse, we can only have one definition. - if (BasicBlock *Pred = BB->getSinglePredecessor()) { - return getPreviousDefFromEnd(Pred); +MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( + BasicBlock *BB, + DenseMap> &CachedPreviousDef) { + // First, do a cache lookup. Without this cache, certain CFG structures + // (like a series of if statements) take exponential time to visit. + auto Cached = CachedPreviousDef.find(BB); + if (Cached != CachedPreviousDef.end()) { + return Cached->second; + } else if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // Single predecessor case, just recurse, we can only have one definition. + MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); + CachedPreviousDef.insert({BB, Result}); + return Result; } else if (VisitedBlocks.count(BB)) { // We hit our node again, meaning we had a cycle, we must insert a phi // node to break it so we have an operand. The only case this will // insert useless phis is if we have irreducible control flow. - return MSSA->createMemoryPhi(BB); + MemoryAccess *Result = MSSA->createMemoryPhi(BB); + CachedPreviousDef.insert({BB, Result}); + return Result; } else if (VisitedBlocks.insert(BB).second) { // Mark us visited so we can detect a cycle SmallVector PhiOps; @@ -54,7 +65,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. for (auto *Pred : predecessors(BB)) - PhiOps.push_back(getPreviousDefFromEnd(Pred)); + PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -90,6 +101,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { // Set ourselves up for the next variable by resetting visited state. VisitedBlocks.erase(BB); + CachedPreviousDef.insert({BB, Result}); return Result; } llvm_unreachable("Should have hit one of the three cases above"); @@ -100,9 +112,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { // it continues globally, creating phi nodes to ensure we have a single // definition. MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) { - auto *LocalResult = getPreviousDefInBlock(MA); - - return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock()); + if (auto *LocalResult = getPreviousDefInBlock(MA)) + return LocalResult; + DenseMap> CachedPreviousDef; + return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef); } // This starts at the memory access, and goes backwards in the block to the find @@ -133,13 +146,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { } // This starts at the end of block -MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) { +MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd( + BasicBlock *BB, + DenseMap> &CachedPreviousDef) { auto *Defs = MSSA->getWritableBlockDefs(BB); if (Defs) return &*Defs->rbegin(); - return getPreviousDefRecursive(BB); + return getPreviousDefRecursive(BB, CachedPreviousDef); } // Recurse over a set of phi uses to eliminate the trivial ones MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { @@ -230,10 +245,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { InsertedPHIs.clear(); // See if we had a local def, and if not, go hunting. - MemoryAccess *DefBefore = getPreviousDefInBlock(MD); - bool DefBeforeSameBlock = DefBefore != nullptr; - if (!DefBefore) - DefBefore = getPreviousDefRecursive(MD->getBlock()); + MemoryAccess *DefBefore = getPreviousDef(MD); + bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock(); // There is a def before us, which means we can replace any store/phi uses // of that thing with us, since we are in the way of whatever was there diff --git a/llvm/test/Transforms/GVNHoist/hoist-simplify-phi.ll b/llvm/test/Transforms/GVNHoist/hoist-simplify-phi.ll new file mode 100644 index 000000000000..84f10ab79bdd --- /dev/null +++ b/llvm/test/Transforms/GVNHoist/hoist-simplify-phi.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -gvn-hoist -S | FileCheck %s + +; This test is meant to make sure that MemorySSAUpdater works correctly +; in non-trivial cases. + +; CHECK: if.else218: +; CHECK-NEXT: %0 = getelementptr inbounds %s, %s* undef, i32 0, i32 0 +; CHECK-NEXT: %1 = load i32, i32* %0, align 4 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +%s = type { i32, %s**, [3 x i8], i8 } + +define void @test() { +entry: + br label %cond.end118 + +cond.end118: ; preds = %entry + br i1 undef, label %cleanup, label %if.end155 + +if.end155: ; preds = %cond.end118 + br label %while.cond + +while.cond: ; preds = %while.body, %if.end155 + br i1 undef, label %while.end, label %while.body + +while.body: ; preds = %while.cond + br label %while.cond + +while.end: ; preds = %while.cond + switch i32 undef, label %if.else218 [ + i32 1, label %cleanup + i32 0, label %if.then174 + ] + +if.then174: ; preds = %while.end + unreachable + +if.else218: ; preds = %while.end + br i1 undef, label %if.then226, label %if.else326 + +if.then226: ; preds = %if.else218 + %size227 = getelementptr inbounds %s, %s* undef, i32 0, i32 0 + %0 = load i32, i32* %size227, align 4 + unreachable + +if.else326: ; preds = %if.else218 + %size330 = getelementptr inbounds %s, %s* undef, i32 0, i32 0 + %1 = load i32, i32* %size330, align 4 + unreachable + +cleanup: ; preds = %while.end, %cond.end118 + ret void +}