[MemorySSA] Fix exponential compile-time updating MemorySSA.

MemorySSAUpdater::getPreviousDefRecursive is a recursive algorithm, for
each block, it computes the previous definition for each predecessor,
then takes those definitions and combines them. But currently it doesn't
remember results which it already computed; this means it can visit the
same block multiple times, which adds up to exponential time overall.

To fix this, this patch adds a cache. If we computed the result for a
block already, we don't need to visit it again because we'll come up
with the same result. Well, unless we RAUW a MemoryPHI; in that case,
the TrackingVH will be updated automatically.

This matches the original source paper for this algorithm.

The testcase isn't really a test for the bug, but it adds coverage for
the case where tryRemoveTrivialPhi erases an existing PHI node. (It's
hard to write a good regression test for a performance issue.)

Differential Revision: https://reviews.llvm.org/D44715

llvm-svn: 328577
This commit is contained in:
Eli Friedman 2018-03-26 19:52:54 +00:00
parent 93ce24838c
commit 88e2bac94d
3 changed files with 89 additions and 17 deletions

View File

@ -43,6 +43,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@ -141,8 +142,12 @@ private:
void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where);
MemoryAccess *getPreviousDef(MemoryAccess *);
MemoryAccess *getPreviousDefInBlock(MemoryAccess *);
MemoryAccess *getPreviousDefFromEnd(BasicBlock *);
MemoryAccess *getPreviousDefRecursive(BasicBlock *);
MemoryAccess *
getPreviousDefFromEnd(BasicBlock *,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
MemoryAccess *
getPreviousDefRecursive(BasicBlock *,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
MemoryAccess *recursePhi(MemoryAccess *Phi);
template <class RangeType>
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);

View File

@ -37,15 +37,26 @@ using namespace llvm;
// that there are two or more definitions needing to be merged.
// This still will leave non-minimal form in the case of irreducible control
// flow, where phi nodes may be in cycles with themselves, but unnecessary.
MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
BasicBlock *BB,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
// First, do a cache lookup. Without this cache, certain CFG structures
// (like a series of if statements) take exponential time to visit.
auto Cached = CachedPreviousDef.find(BB);
if (Cached != CachedPreviousDef.end()) {
return Cached->second;
} else if (BasicBlock *Pred = BB->getSinglePredecessor()) {
// Single predecessor case, just recurse, we can only have one definition.
if (BasicBlock *Pred = BB->getSinglePredecessor()) {
return getPreviousDefFromEnd(Pred);
MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
CachedPreviousDef.insert({BB, Result});
return Result;
} else if (VisitedBlocks.count(BB)) {
// We hit our node again, meaning we had a cycle, we must insert a phi
// node to break it so we have an operand. The only case this will
// insert useless phis is if we have irreducible control flow.
return MSSA->createMemoryPhi(BB);
MemoryAccess *Result = MSSA->createMemoryPhi(BB);
CachedPreviousDef.insert({BB, Result});
return Result;
} else if (VisitedBlocks.insert(BB).second) {
// Mark us visited so we can detect a cycle
SmallVector<MemoryAccess *, 8> PhiOps;
@ -54,7 +65,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
// potential phi node. This will insert phi nodes if we cycle in order to
// break the cycle and have an operand.
for (auto *Pred : predecessors(BB))
PhiOps.push_back(getPreviousDefFromEnd(Pred));
PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
@ -90,6 +101,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
// Set ourselves up for the next variable by resetting visited state.
VisitedBlocks.erase(BB);
CachedPreviousDef.insert({BB, Result});
return Result;
}
llvm_unreachable("Should have hit one of the three cases above");
@ -100,9 +112,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
// it continues globally, creating phi nodes to ensure we have a single
// definition.
MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
auto *LocalResult = getPreviousDefInBlock(MA);
return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock());
if (auto *LocalResult = getPreviousDefInBlock(MA))
return LocalResult;
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef);
}
// This starts at the memory access, and goes backwards in the block to the find
@ -133,13 +146,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
}
// This starts at the end of block
MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) {
MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
BasicBlock *BB,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
auto *Defs = MSSA->getWritableBlockDefs(BB);
if (Defs)
return &*Defs->rbegin();
return getPreviousDefRecursive(BB);
return getPreviousDefRecursive(BB, CachedPreviousDef);
}
// Recurse over a set of phi uses to eliminate the trivial ones
MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
@ -230,10 +245,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
InsertedPHIs.clear();
// See if we had a local def, and if not, go hunting.
MemoryAccess *DefBefore = getPreviousDefInBlock(MD);
bool DefBeforeSameBlock = DefBefore != nullptr;
if (!DefBefore)
DefBefore = getPreviousDefRecursive(MD->getBlock());
MemoryAccess *DefBefore = getPreviousDef(MD);
bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock();
// There is a def before us, which means we can replace any store/phi uses
// of that thing with us, since we are in the way of whatever was there

View File

@ -0,0 +1,54 @@
; RUN: opt < %s -gvn-hoist -S | FileCheck %s
; This test is meant to make sure that MemorySSAUpdater works correctly
; in non-trivial cases.
; CHECK: if.else218:
; CHECK-NEXT: %0 = getelementptr inbounds %s, %s* undef, i32 0, i32 0
; CHECK-NEXT: %1 = load i32, i32* %0, align 4
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
%s = type { i32, %s**, [3 x i8], i8 }
define void @test() {
entry:
br label %cond.end118
cond.end118: ; preds = %entry
br i1 undef, label %cleanup, label %if.end155
if.end155: ; preds = %cond.end118
br label %while.cond
while.cond: ; preds = %while.body, %if.end155
br i1 undef, label %while.end, label %while.body
while.body: ; preds = %while.cond
br label %while.cond
while.end: ; preds = %while.cond
switch i32 undef, label %if.else218 [
i32 1, label %cleanup
i32 0, label %if.then174
]
if.then174: ; preds = %while.end
unreachable
if.else218: ; preds = %while.end
br i1 undef, label %if.then226, label %if.else326
if.then226: ; preds = %if.else218
%size227 = getelementptr inbounds %s, %s* undef, i32 0, i32 0
%0 = load i32, i32* %size227, align 4
unreachable
if.else326: ; preds = %if.else218
%size330 = getelementptr inbounds %s, %s* undef, i32 0, i32 0
%1 = load i32, i32* %size330, align 4
unreachable
cleanup: ; preds = %while.end, %cond.end118
ret void
}