From 45172aceb30c5bdc4820776e7b62861526664d66 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 25 Nov 2014 17:23:05 +0000 Subject: [PATCH] LazyValueInfo: Actually re-visit partially solved block-values in solveBlockValue() If solveBlockValue() needs results from predecessors that are not already computed, it returns false with the intention of resuming when the dependencies have been resolved. However, the computation would never be resumed since an 'overdefined' result had been placed in the cache, preventing any further computation. The point of placing the 'overdefined' result in the cache seems to have been to break cycles, but we can check for that when inserting work items in the BlockValue stack instead. This makes the "stop and resume" mechanism of solveBlockValue() work as intended, unlocking more analysis. Using this patch shaves 120 KB off a 64-bit Chromium build on Linux. I benchmarked compiling bzip2.c at -O2 but couldn't measure any difference in compile time. Tests by Jiangning Liu from r215343 / PR21238, Pete Cooper, and me. Differential Revision: http://reviews.llvm.org/D6397 llvm-svn: 222768 --- llvm/lib/Analysis/LazyValueInfo.cpp | 134 ++++++++++-------- .../CorrelatedValuePropagation/icmp.ll | 63 ++++++++ .../JumpThreading/conservative-lvi.ll | 58 ++++++++ 3 files changed, 198 insertions(+), 57 deletions(-) create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll create mode 100644 llvm/test/Transforms/JumpThreading/conservative-lvi.ll diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index f0664bfa9dd5..d56df14355df 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -342,6 +342,21 @@ namespace { /// recursive value lookup process. std::stack > BlockValueStack; + /// BlockValueSet - Keeps track of which block-value pairs are in + /// BlockValueStack. + DenseSet > BlockValueSet; + + /// pushBlockValue - Push BV onto BlockValueStack unless it's already in + /// there. Returns true on success. + bool pushBlockValue(const std::pair &BV) { + if (BlockValueSet.count(BV)) + return false; // It's already in the stack. + + BlockValueStack.push(BV); + BlockValueSet.insert(BV); + return true; + } + /// A pointer to the cache of @llvm.assume calls. AssumptionTracker *AT; /// An optional DL pointer. @@ -350,27 +365,13 @@ namespace { DominatorTree *DT; friend struct LVIValueHandle; - - /// OverDefinedCacheUpdater - A helper object that ensures that the - /// OverDefinedCache is updated whenever solveBlockValue returns. - struct OverDefinedCacheUpdater { - LazyValueInfoCache *Parent; - Value *Val; - BasicBlock *BB; - LVILatticeVal &BBLV; - - OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV, - LazyValueInfoCache *P) - : Parent(P), Val(V), BB(B), BBLV(LV) { } - - bool markResult(bool changed) { - if (changed && BBLV.isOverdefined()) - Parent->OverDefinedCache.insert(std::make_pair(BB, Val)); - return changed; - } - }; - + void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { + SeenBlocks.insert(BB); + lookup(Val)[BB] = Result; + if (Result.isOverdefined()) + OverDefinedCache.insert(std::make_pair(BB, Val)); + } LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, @@ -472,9 +473,18 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { void LazyValueInfoCache::solve() { while (!BlockValueStack.empty()) { std::pair &e = BlockValueStack.top(); + assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); + if (solveBlockValue(e.second, e.first)) { - assert(BlockValueStack.top() == e); + // The work item was completely processed. + assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); + assert(lookup(e.second).count(e.first) && "Result should be in cache!"); + BlockValueStack.pop(); + BlockValueSet.erase(e); + } else { + // More work needs to be done before revisiting. + assert(BlockValueStack.top() != e && "Stack should have been pushed!"); } } } @@ -504,43 +514,40 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { if (isa(Val)) return true; - ValueCacheEntryTy &Cache = lookup(Val); - SeenBlocks.insert(BB); - LVILatticeVal &BBLV = Cache[BB]; - - // OverDefinedCacheUpdater is a helper object that will update - // the OverDefinedCache for us when this method exits. Make sure to - // call markResult on it as we exit, passing a bool to indicate if the - // cache needs updating, i.e. if we have solved a new value or not. - OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); + if (lookup(Val).count(BB)) { + // If we have a cached value, use that. + DEBUG(dbgs() << " reuse BB '" << BB->getName() + << "' val=" << lookup(Val)[BB] << '\n'); - if (!BBLV.isUndefined()) { - DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); - - // Since we're reusing a cached value here, we don't need to update the - // OverDefinedCache. The cache will have been properly updated - // whenever the cached value was inserted. - ODCacheUpdater.markResult(false); + // Since we're reusing a cached value, we don't need to update the + // OverDefinedCache. The cache will have been properly updated whenever the + // cached value was inserted. return true; } - // Otherwise, this is the first time we're seeing this block. Reset the - // lattice value to overdefined, so that cycles will terminate and be - // conservatively correct. - BBLV.markOverdefined(); + // Hold off inserting this value into the Cache in case we have to return + // false and come back later. + LVILatticeVal Res; Instruction *BBI = dyn_cast(Val); if (!BBI || BBI->getParent() != BB) { - return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); + if (!solveBlockValueNonLocal(Res, Val, BB)) + return false; + insertResult(Val, BB, Res); + return true; } if (PHINode *PN = dyn_cast(BBI)) { - return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB)); + if (!solveBlockValuePHINode(Res, PN, BB)) + return false; + insertResult(Val, BB, Res); + return true; } if (AllocaInst *AI = dyn_cast(BBI)) { - BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); - return ODCacheUpdater.markResult(true); + Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + insertResult(Val, BB, Res); + return true; } // We can only analyze the definitions of certain classes of instructions @@ -550,8 +557,9 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { !BBI->getType()->isIntegerTy()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - BBLV.markOverdefined(); - return ODCacheUpdater.markResult(true); + Res.markOverdefined(); + insertResult(Val, BB, Res); + return true; } // FIXME: We're currently limited to binops with a constant RHS. This should @@ -561,11 +569,15 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because inst def found.\n"); - BBLV.markOverdefined(); - return ODCacheUpdater.markResult(true); + Res.markOverdefined(); + insertResult(Val, BB, Res); + return true; } - return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB)); + if (!solveBlockValueConstantRange(Res, BBI, BB)) + return false; + insertResult(Val, BB, Res); + return true; } static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { @@ -745,8 +757,10 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, BasicBlock *BB) { // Figure out the range of the LHS. If that fails, bail. if (!hasBlockValue(BBI->getOperand(0), BB)) { - BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0))); - return false; + if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + return false; + BBLV.markOverdefined(); + return true; } LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); @@ -940,8 +954,10 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // LVI better supports recursive values. Even for the single value case, we // can intersect to detect dead code (an empty range). if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; + if (pushBlockValue(std::make_pair(BBFrom, Val))) + return false; + Result.markOverdefined(); + return true; } // Try to intersect ranges of the BB and the constraint on the edge. @@ -960,8 +976,10 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, } if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; + if (pushBlockValue(std::make_pair(BBFrom, Val))) + return false; + Result.markOverdefined(); + return true; } // If we couldn't compute the value on the edge, use the value from the BB. @@ -984,7 +1002,9 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB, DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - BlockValueStack.push(std::make_pair(BB, V)); + assert(BlockValueStack.empty() && BlockValueSet.empty()); + pushBlockValue(std::make_pair(BB, V)); + solve(); LVILatticeVal Result = getBlockValue(V, BB); mergeAssumeBlockValueConstantRange(V, Result, CxtI); diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll new file mode 100644 index 000000000000..c2863ffda0fb --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -0,0 +1,63 @@ +; RUN: opt -correlated-propagation -S %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Function Attrs: noreturn +declare void @check1(i1) #1 + +; Function Attrs: noreturn +declare void @check2(i1) #1 + +; Make sure we propagate the value of %tmp35 to the true/false cases +; CHECK-LABEL: @test1 +; CHECK: call void @check1(i1 false) +; CHECK: call void @check2(i1 true) +define void @test1(i64 %tmp35) { +bb: + %tmp36 = icmp sgt i64 %tmp35, 0 + br i1 %tmp36, label %bb_true, label %bb_false + +bb_true: + %tmp47 = icmp slt i64 %tmp35, 0 + tail call void @check1(i1 %tmp47) #4 + unreachable + +bb_false: + %tmp48 = icmp sle i64 %tmp35, 0 + tail call void @check2(i1 %tmp48) #4 + unreachable +} + +; Function Attrs: noreturn +; This is the same as test1 but with a diamond to ensure we +; get %tmp36 from both true and false BBs. +; CHECK-LABEL: @test2 +; CHECK: call void @check1(i1 false) +; CHECK: call void @check2(i1 true) +define void @test2(i64 %tmp35, i1 %inner_cmp) { +bb: + %tmp36 = icmp sgt i64 %tmp35, 0 + br i1 %tmp36, label %bb_true, label %bb_false + +bb_true: + br i1 %inner_cmp, label %inner_true, label %inner_false + +inner_true: + br label %merge + +inner_false: + br label %merge + +merge: + %tmp47 = icmp slt i64 %tmp35, 0 + tail call void @check1(i1 %tmp47) #0 + unreachable + +bb_false: + %tmp48 = icmp sle i64 %tmp35, 0 + tail call void @check2(i1 %tmp48) #4 + unreachable +} + +attributes #4 = { noreturn } diff --git a/llvm/test/Transforms/JumpThreading/conservative-lvi.ll b/llvm/test/Transforms/JumpThreading/conservative-lvi.ll new file mode 100644 index 000000000000..1ea8cdc4693a --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/conservative-lvi.ll @@ -0,0 +1,58 @@ +; RUN: opt -jump-threading -S %s | FileCheck %s + +; Check that we thread arg2neg -> checkpos -> end. +; +; LazyValueInfo would previously fail to analyze the value of %arg in arg2neg +; because its predecessing blocks (checkneg) hadn't been processed yet (PR21238) + +; CHECK-LABEL: @test_jump_threading +; CHECK: arg2neg: +; CHECK-NEXT: br i1 %arg1, label %end, label %checkpos.thread +; CHECK: checkpos.thread: +; CHECK-NEXT: br label %end + +define i32 @test_jump_threading(i1 %arg1, i32 %arg2) { +checkneg: + %cmp = icmp slt i32 %arg2, 0 + br i1 %cmp, label %arg2neg, label %checkpos + +arg2neg: + br i1 %arg1, label %end, label %checkpos + +checkpos: + %cmp2 = icmp sgt i32 %arg2, 0 + br i1 %cmp2, label %arg2pos, label %end + +arg2pos: + br label %end + +end: + %0 = phi i32 [ 1, %arg2neg ], [ 2, %checkpos ], [ 3, %arg2pos ] + ret i32 %0 +} + + +; arg2neg has an edge back to itself. If LazyValueInfo is not careful when +; visiting predecessors, it could get into an infinite loop. + +; CHECK-LABEL: test_infinite_loop + +define i32 @test_infinite_loop(i1 %arg1, i32 %arg2) { +checkneg: + %cmp = icmp slt i32 %arg2, 0 + br i1 %cmp, label %arg2neg, label %checkpos + +arg2neg: + br i1 %arg1, label %arg2neg, label %checkpos + +checkpos: + %cmp2 = icmp sgt i32 %arg2, 0 + br i1 %cmp2, label %arg2pos, label %end + +arg2pos: + br label %end + +end: + %0 = phi i32 [ 2, %checkpos ], [ 3, %arg2pos ] + ret i32 %0 +}