forked from OSchip/llvm-project
Resubmit: [JumpThreading] Thread jumps through two basic blocks
This reverts commit 2d258ed931
. This
revision fixes the Windows build and adds a testcase for it, namely
thread-two-bbs3.ll. My original patch improperly copied EH pads on
Windows. This patch disregards jump threading opportunities having to
do with EH pads.
[JumpThreading] Thread jumps through two basic blocks
Summary:
This patch teaches JumpThreading.cpp to thread through two basic
blocks like:
bb3:
%var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %bb4, label ...
bb4:
%cmp = icmp eq i32* %var, null
br i1 %cmp, label bb5, label bb6
by duplicating basic blocks like bb3 above. Once we duplicate bb3 as
bb3.dup and redirect edge bb2->bb3 to bb2->bb3.dup, we have:
bb3:
%var = phi i32* [ @a, %bb2 ]
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %bb4, label ...
bb3.dup:
%var = phi i32* [ null, %bb1 ]
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %bb4, label ...
bb4:
%cmp = icmp eq i32* %var, null
br i1 %cmp, label bb5, label bb6
Then the existing code in JumpThreading.cpp can thread edge
bb3.dup->bb4 through bb4 and eventually create bb3.dup->bb5.
Reviewers: wmi
Subscribers: hiraditya, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70247
This commit is contained in:
parent
6f3effbbf0
commit
53b68e676f
|
@ -139,6 +139,11 @@ public:
|
|||
RecursionSet, CxtI);
|
||||
}
|
||||
|
||||
Constant *EvaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
|
||||
Value *cond);
|
||||
bool MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond);
|
||||
void ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB,
|
||||
BasicBlock *BB, BasicBlock *SuccBB);
|
||||
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
|
||||
jumpthreading::ConstantPreference Preference,
|
||||
Instruction *CxtI = nullptr);
|
||||
|
|
|
@ -1548,6 +1548,52 @@ FindMostPopularDest(BasicBlock *BB,
|
|||
return MostPopularDest;
|
||||
}
|
||||
|
||||
// Try to evaluate the value of V when the control flows from PredPredBB to
|
||||
// BB->getSinglePredecessor() and then on to BB.
|
||||
Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
|
||||
BasicBlock *PredPredBB,
|
||||
Value *V) {
|
||||
BasicBlock *PredBB = BB->getSinglePredecessor();
|
||||
assert(PredBB && "Expected a single predecessor");
|
||||
|
||||
if (Constant *Cst = dyn_cast<Constant>(V)) {
|
||||
return Cst;
|
||||
}
|
||||
|
||||
// Consult LVI if V is not an instruction in BB or PredBB.
|
||||
Instruction *I = dyn_cast<Instruction>(V);
|
||||
if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
|
||||
if (DTU->hasPendingDomTreeUpdates())
|
||||
LVI->disableDT();
|
||||
else
|
||||
LVI->enableDT();
|
||||
return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
|
||||
}
|
||||
|
||||
// Look into a PHI argument.
|
||||
if (PHINode *PHI = dyn_cast<PHINode>(V)) {
|
||||
if (PHI->getParent() == PredBB)
|
||||
return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// If we have a CmpInst, try to fold it for each incoming edge into PredBB.
|
||||
if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
|
||||
if (CondCmp->getParent() == BB) {
|
||||
Constant *Op0 =
|
||||
EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
|
||||
Constant *Op1 =
|
||||
EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
|
||||
if (Op0 && Op1) {
|
||||
return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
|
||||
ConstantPreference Preference,
|
||||
Instruction *CxtI) {
|
||||
|
@ -1557,8 +1603,12 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
|
|||
return false;
|
||||
|
||||
PredValueInfoTy PredValues;
|
||||
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
|
||||
return false;
|
||||
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
|
||||
CxtI)) {
|
||||
// We don't have known values in predecessors. See if we can thread through
|
||||
// BB and its sole predecessor.
|
||||
return MaybeThreadThroughTwoBasicBlocks(BB, Cond);
|
||||
}
|
||||
|
||||
assert(!PredValues.empty() &&
|
||||
"ComputeValueKnownInPredecessors returned true with no values");
|
||||
|
@ -2015,6 +2065,186 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
|
|||
return ValueMapping;
|
||||
}
|
||||
|
||||
/// Attempt to thread through two successive basic blocks.
|
||||
bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
|
||||
Value *Cond) {
|
||||
// Consider:
|
||||
//
|
||||
// PredBB:
|
||||
// %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
|
||||
// %tobool = icmp eq i32 %cond, 0
|
||||
// br i1 %tobool, label %BB, label ...
|
||||
//
|
||||
// BB:
|
||||
// %cmp = icmp eq i32* %var, null
|
||||
// br i1 %cmp, label ..., label ...
|
||||
//
|
||||
// We don't know the value of %var at BB even if we know which incoming edge
|
||||
// we take to BB. However, once we duplicate PredBB for each of its incoming
|
||||
// edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
|
||||
// PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
|
||||
|
||||
// Require that BB end with a Branch for simplicity.
|
||||
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
|
||||
if (!CondBr)
|
||||
return false;
|
||||
|
||||
// BB must have exactly one predecessor.
|
||||
BasicBlock *PredBB = BB->getSinglePredecessor();
|
||||
if (!PredBB)
|
||||
return false;
|
||||
|
||||
// Require that PredBB end with a Branch. If PredBB ends with an
|
||||
// unconditional branch, we should be merging PredBB and BB instead. For
|
||||
// simplicity, we don't deal with a switch.
|
||||
BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
|
||||
if (!PredBBBranch)
|
||||
return false;
|
||||
|
||||
// If PredBB has exactly one incoming edge, we don't gain anything by copying
|
||||
// PredBB.
|
||||
if (PredBB->getSinglePredecessor())
|
||||
return false;
|
||||
|
||||
// Don't thread across a loop header.
|
||||
if (LoopHeaders.count(PredBB))
|
||||
return false;
|
||||
|
||||
// Avoid complication with duplicating EH pads.
|
||||
if (PredBB->isEHPad())
|
||||
return false;
|
||||
|
||||
// Find a predecessor that we can thread. For simplicity, we only consider a
|
||||
// successor edge out of BB to which we thread exactly one incoming edge into
|
||||
// PredBB.
|
||||
unsigned ZeroCount = 0;
|
||||
unsigned OneCount = 0;
|
||||
BasicBlock *ZeroPred = nullptr;
|
||||
BasicBlock *OnePred = nullptr;
|
||||
for (BasicBlock *P : predecessors(PredBB)) {
|
||||
if (Constant *Cst = EvaluateOnPredecessorEdge(BB, P, Cond)) {
|
||||
if (Cst->isZeroValue()) {
|
||||
ZeroCount++;
|
||||
ZeroPred = P;
|
||||
} else {
|
||||
OneCount++;
|
||||
OnePred = P;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disregard complicated cases where we have to thread multiple edges.
|
||||
BasicBlock *PredPredBB;
|
||||
if (ZeroCount == 1) {
|
||||
PredPredBB = ZeroPred;
|
||||
} else if (OneCount == 1) {
|
||||
PredPredBB = OnePred;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
|
||||
|
||||
// If threading to the same block as we come from, we would infinite loop.
|
||||
if (SuccBB == BB) {
|
||||
LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
|
||||
<< "' - would thread to self!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// If threading this would thread across a loop header, don't thread the edge.
|
||||
// See the comments above FindLoopHeaders for justifications and caveats.
|
||||
if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
|
||||
LLVM_DEBUG({
|
||||
bool BBIsHeader = LoopHeaders.count(BB);
|
||||
bool SuccIsHeader = LoopHeaders.count(SuccBB);
|
||||
dbgs() << " Not threading across "
|
||||
<< (BBIsHeader ? "loop header BB '" : "block BB '")
|
||||
<< BB->getName() << "' to dest "
|
||||
<< (SuccIsHeader ? "loop header BB '" : "block BB '")
|
||||
<< SuccBB->getName()
|
||||
<< "' - it might create an irreducible loop!\n";
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check the cost of duplicating BB and PredBB.
|
||||
unsigned JumpThreadCost =
|
||||
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
|
||||
JumpThreadCost += getJumpThreadDuplicationCost(
|
||||
PredBB, PredBB->getTerminator(), BBDupThreshold);
|
||||
if (JumpThreadCost > BBDupThreshold) {
|
||||
LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
|
||||
<< "' - Cost is too high: " << JumpThreadCost << "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now we are ready to duplicate PredBB.
|
||||
ThreadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
|
||||
return true;
|
||||
}
|
||||
|
||||
void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
|
||||
BasicBlock *PredBB,
|
||||
BasicBlock *BB,
|
||||
BasicBlock *SuccBB) {
|
||||
LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
|
||||
<< BB->getName() << "'\n");
|
||||
|
||||
BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
|
||||
BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
|
||||
|
||||
BasicBlock *NewBB =
|
||||
BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
|
||||
PredBB->getParent(), PredBB);
|
||||
NewBB->moveAfter(PredBB);
|
||||
|
||||
// Set the block frequency of NewBB.
|
||||
if (HasProfileData) {
|
||||
auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
|
||||
BPI->getEdgeProbability(PredPredBB, PredBB);
|
||||
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
|
||||
}
|
||||
|
||||
// We are going to have to map operands from the original BB block to the new
|
||||
// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
|
||||
// to account for entry from PredPredBB.
|
||||
DenseMap<Instruction *, Value *> ValueMapping =
|
||||
CloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
|
||||
|
||||
// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
|
||||
// This eliminates predecessors from PredPredBB, which requires us to simplify
|
||||
// any PHI nodes in PredBB.
|
||||
Instruction *PredPredTerm = PredPredBB->getTerminator();
|
||||
for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
|
||||
if (PredPredTerm->getSuccessor(i) == PredBB) {
|
||||
PredBB->removePredecessor(PredPredBB, true);
|
||||
PredPredTerm->setSuccessor(i, NewBB);
|
||||
}
|
||||
|
||||
AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
|
||||
ValueMapping);
|
||||
AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
|
||||
ValueMapping);
|
||||
|
||||
DTU->applyUpdatesPermissive(
|
||||
{{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
|
||||
{DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
|
||||
{DominatorTree::Insert, PredPredBB, NewBB},
|
||||
{DominatorTree::Delete, PredPredBB, PredBB}});
|
||||
|
||||
UpdateSSA(PredBB, NewBB, ValueMapping);
|
||||
|
||||
// Clean up things like PHI nodes with single operands, dead instructions,
|
||||
// etc.
|
||||
SimplifyInstructionsInBlock(NewBB, TLI);
|
||||
SimplifyInstructionsInBlock(PredBB, TLI);
|
||||
|
||||
SmallVector<BasicBlock *, 1> PredsToFactor;
|
||||
PredsToFactor.push_back(NewBB);
|
||||
ThreadEdge(BB, PredsToFactor, SuccBB);
|
||||
}
|
||||
|
||||
/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
|
||||
bool JumpThreadingPass::TryThreadEdge(
|
||||
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@a = global i32 0, align 4
|
||||
|
||||
define void @foo(i32 %cond1, i32 %cond2) {
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-LABEL: entry
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond1, 0
|
||||
br i1 %tobool, label %bb.cond2, label %bb.f1
|
||||
|
||||
bb.f1:
|
||||
call void @f1()
|
||||
br label %bb.cond2
|
||||
; Verify that we branch on cond2 without checking ptr.
|
||||
; CHECK: call void @f1()
|
||||
; CHECK-NEXT: icmp eq i32 %cond2, 0
|
||||
; CHECK-NEXT: label %bb.f4, label %bb.f2
|
||||
|
||||
bb.cond2:
|
||||
%ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ]
|
||||
%tobool1 = icmp eq i32 %cond2, 0
|
||||
br i1 %tobool1, label %bb.file, label %bb.f2
|
||||
; Verify that we branch on cond2 without checking ptr.
|
||||
; CHECK: icmp eq i32 %cond2, 0
|
||||
; CHECK-NEXT: label %bb.f3, label %bb.f2
|
||||
|
||||
bb.f2:
|
||||
call void @f2()
|
||||
br label %exit
|
||||
|
||||
; Verify that we eliminate this basic block.
|
||||
; CHECK-NOT: bb.file:
|
||||
bb.file:
|
||||
%cmp = icmp eq i32* %ptr, null
|
||||
br i1 %cmp, label %bb.f4, label %bb.f3
|
||||
|
||||
bb.f3:
|
||||
call void @f3()
|
||||
br label %exit
|
||||
|
||||
bb.f4:
|
||||
call void @f4()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @f1()
|
||||
|
||||
declare void @f2()
|
||||
|
||||
declare void @f3()
|
||||
|
||||
declare void @f4()
|
|
@ -0,0 +1,56 @@
|
|||
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @foo(i32 %cond1, i32 %cond2) {
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-LABEL: entry
|
||||
entry:
|
||||
%tobool = icmp ne i32 %cond1, 0
|
||||
br i1 %tobool, label %bb.f1, label %bb.f2
|
||||
|
||||
bb.f1:
|
||||
call void @f1()
|
||||
br label %bb.cond2
|
||||
; Verify that we branch on cond2 without checking tobool again.
|
||||
; CHECK: call void @f1()
|
||||
; CHECK-NEXT: icmp eq i32 %cond2, 0
|
||||
; CHECK-NEXT: label %exit, label %bb.f3
|
||||
|
||||
bb.f2:
|
||||
call void @f2()
|
||||
br label %bb.cond2
|
||||
; Verify that we branch on cond2 without checking tobool again.
|
||||
; CHECK: call void @f2()
|
||||
; CHECK-NEXT: icmp eq i32 %cond2, 0
|
||||
; CHECK-NEXT: label %exit, label %bb.f4
|
||||
|
||||
bb.cond2:
|
||||
%tobool1 = icmp eq i32 %cond2, 0
|
||||
br i1 %tobool1, label %exit, label %bb.cond1again
|
||||
|
||||
; Verify that we eliminate this basic block.
|
||||
; CHECK-NOT: bb.cond1again:
|
||||
bb.cond1again:
|
||||
br i1 %tobool, label %bb.f3, label %bb.f4
|
||||
|
||||
bb.f3:
|
||||
call void @f3()
|
||||
br label %exit
|
||||
|
||||
bb.f4:
|
||||
call void @f4()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @f1() local_unnamed_addr
|
||||
|
||||
declare void @f2() local_unnamed_addr
|
||||
|
||||
declare void @f3() local_unnamed_addr
|
||||
|
||||
declare void @f4() local_unnamed_addr
|
|
@ -0,0 +1,39 @@
|
|||
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-pc-windows-msvc19.16.27026"
|
||||
|
||||
; Verify that we do *not* thread any edge. On Windows, we used to
|
||||
; improperly duplicate EH pads like bb_cleanup below, resulting in an
|
||||
; assertion failure later down the pass pipeline.
|
||||
define void @foo([2 x i8]* %0) personality i8* bitcast (i32 ()* @baz to i8*) {
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-NOT: bb_{{[^ ]*}}.thread:
|
||||
entry:
|
||||
invoke void @bar()
|
||||
to label %bb_invoke unwind label %bb_cleanuppad
|
||||
|
||||
bb_invoke:
|
||||
invoke void @bar()
|
||||
to label %bb_exit unwind label %bb_cleanuppad
|
||||
|
||||
bb_cleanuppad:
|
||||
%index = phi i64 [ 1, %bb_invoke ], [ 0, %entry ]
|
||||
%cond1 = phi i1 [ false, %bb_invoke ], [ true, %entry ]
|
||||
%1 = cleanuppad within none []
|
||||
br i1 %cond1, label %bb_action, label %bb_cleanupret
|
||||
|
||||
bb_action:
|
||||
%cond2 = icmp eq i64 %index, 0
|
||||
br i1 %cond2, label %bb_cleanupret, label %bb_exit
|
||||
|
||||
bb_exit:
|
||||
call void @bar()
|
||||
ret void
|
||||
|
||||
bb_cleanupret:
|
||||
cleanupret from %1 unwind to caller
|
||||
}
|
||||
|
||||
declare void @bar()
|
||||
declare i32 @baz()
|
Loading…
Reference in New Issue