From 9394c3ec881a974afe679467f79a891548eadb89 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Jul 2020 21:26:30 +0200 Subject: [PATCH] [SCCP] Directly remove non-feasible edges Non-feasible control-flow edges are currently removed by replacing the branch condition with a constant and then calling ConstantFoldTerminator. This happens in a rather roundabout manner, by inspecting the users (effectively: predecessors) of unreachable blocks, and further complicated by the need to explicitly materialize the condition for "forced" edges. I would like to extend SCCP to discard switch conditions that are non-feasible based on range information, but this is incompatible with the current approach (as there is no single constant we could use.) Instead, this patch explicitly removes non-feasible edges. It currently only needs to handle the case where there is a single feasible edge. The llvm_unreachable() branch will need to be implemented for the aforementioned switch improvement. Differential Revision: https://reviews.llvm.org/D84264 --- llvm/lib/Transforms/Scalar/SCCP.cpp | 116 +++++++----------- .../test/Transforms/SCCP/conditions-ranges.ll | 4 +- .../Transforms/SCCP/predicateinfo-cond.ll | 2 +- .../SCCP/resolvedundefsin-tracked-fn.ll | 8 +- .../SCCP/switch-constantfold-crash.ll | 12 +- llvm/test/Transforms/SCCP/switch.ll | 8 +- llvm/test/Transforms/SCCP/widening.ll | 14 +-- 7 files changed, 68 insertions(+), 96 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 32dc14e5ec19..b76f67b19e0e 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -276,7 +276,7 @@ public: // isEdgeFeasible - Return true if the control flow edge from the 'From' basic // block to the 'To' basic block is currently feasible. - bool isEdgeFeasible(BasicBlock *From, BasicBlock *To); + bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const; std::vector getStructLatticeValueFor(Value *V) const { std::vector StructValues; @@ -705,7 +705,7 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI, // isEdgeFeasible - Return true if the control flow edge from the 'From' basic // block to the 'To' basic block is currently feasible. -bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { +bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const { // Check if we've called markEdgeExecutable on the edge yet. (We could // be more aggressive and try to consider edges which haven't been marked // yet, but there isn't any need.) @@ -1807,39 +1807,51 @@ static void findReturnsToZap(Function &F, } } -// Update the condition for terminators that are branching on indeterminate -// values, forcing them to use a specific edge. -static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) { - BasicBlock *Dest = nullptr; - Constant *C = nullptr; - if (SwitchInst *SI = dyn_cast(I)) { - if (!isa(SI->getCondition())) { - // Indeterminate switch; use first case value. - Dest = SI->case_begin()->getCaseSuccessor(); - C = SI->case_begin()->getCaseValue(); - } - } else if (BranchInst *BI = dyn_cast(I)) { - if (!isa(BI->getCondition())) { - // Indeterminate branch; use false. - Dest = BI->getSuccessor(1); - C = ConstantInt::getFalse(BI->getContext()); - } - } else if (IndirectBrInst *IBR = dyn_cast(I)) { - if (!isa(IBR->getAddress()->stripPointerCasts())) { - // Indeterminate indirectbr; use successor 0. - Dest = IBR->getSuccessor(0); - C = BlockAddress::get(IBR->getSuccessor(0)); - } - } else { - llvm_unreachable("Unexpected terminator instruction"); +static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, + DomTreeUpdater &DTU) { + SmallPtrSet FeasibleSuccessors; + bool HasNonFeasibleEdges = false; + for (BasicBlock *Succ : successors(BB)) { + if (Solver.isEdgeFeasible(BB, Succ)) + FeasibleSuccessors.insert(Succ); + else + HasNonFeasibleEdges = true; } - if (C) { - assert(Solver.isEdgeFeasible(I->getParent(), Dest) && - "Didn't find feasible edge?"); - (void)Dest; - I->setOperand(0, C); + // All edges feasible, nothing to do. + if (!HasNonFeasibleEdges) + return false; + + // SCCP can only determine non-feasible edges for br, switch and indirectbr. + Instruction *TI = BB->getTerminator(); + assert((isa(TI) || isa(TI) || + isa(TI)) && + "Terminator must be a br, switch or indirectbr"); + + if (FeasibleSuccessors.size() == 1) { + // Replace with an unconditional branch to the only feasible successor. + BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin(); + SmallVector Updates; + bool HaveSeenOnlyFeasibleSuccessor = false; + for (BasicBlock *Succ : successors(BB)) { + if (Succ == OnlyFeasibleSuccessor && !HaveSeenOnlyFeasibleSuccessor) { + // Don't remove the edge to the only feasible successor the first time + // we see it. We still do need to remove any multi-edges to it though. + HaveSeenOnlyFeasibleSuccessor = true; + continue; + } + + Succ->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } + + DTU.applyUpdatesPermissive(Updates); + BranchInst::Create(OnlyFeasibleSuccessor, BB); + TI->eraseFromParent(); + } else { + llvm_unreachable("Either all successors are feasible, or exactly one is"); } + return true; } bool llvm::runIPSCCP( @@ -1952,45 +1964,11 @@ bool llvm::runIPSCCP( /*UseLLVMTrap=*/false, /*PreserveLCSSA=*/false, &DTU); - // Now that all instructions in the function are constant folded, - // use ConstantFoldTerminator to get rid of in-edges, record DT updates and - // delete dead BBs. - for (BasicBlock *DeadBB : BlocksToErase) { - // If there are any PHI nodes in this successor, drop entries for BB now. - for (Value::user_iterator UI = DeadBB->user_begin(), - UE = DeadBB->user_end(); - UI != UE;) { - // Grab the user and then increment the iterator early, as the user - // will be deleted. Step past all adjacent uses from the same user. - auto *I = dyn_cast(*UI); - do { ++UI; } while (UI != UE && *UI == I); + for (BasicBlock &BB : F) + removeNonFeasibleEdges(Solver, &BB, DTU); - // Ignore blockaddress users; BasicBlock's dtor will handle them. - if (!I) continue; - - // If we have forced an edge for an indeterminate value, then force the - // terminator to fold to that edge. - forceIndeterminateEdge(I, Solver); - BasicBlock *InstBB = I->getParent(); - bool Folded = ConstantFoldTerminator(InstBB, - /*DeleteDeadConditions=*/false, - /*TLI=*/nullptr, &DTU); - assert(Folded && - "Expect TermInst on constantint or blockaddress to be folded"); - (void) Folded; - // If we folded the terminator to an unconditional branch to another - // dead block, replace it with Unreachable, to avoid trying to fold that - // branch again. - BranchInst *BI = cast(InstBB->getTerminator()); - if (BI && BI->isUnconditional() && - !Solver.isBlockExecutable(BI->getSuccessor(0))) { - InstBB->getTerminator()->eraseFromParent(); - new UnreachableInst(InstBB->getContext(), InstBB); - } - } - // Mark dead BB for deletion. + for (BasicBlock *DeadBB : BlocksToErase) DTU.deleteBB(DeadBB); - } for (BasicBlock &BB : F) { for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) { diff --git a/llvm/test/Transforms/SCCP/conditions-ranges.ll b/llvm/test/Transforms/SCCP/conditions-ranges.ll index 612a38f008fc..dada59099d81 100644 --- a/llvm/test/Transforms/SCCP/conditions-ranges.ll +++ b/llvm/test/Transforms/SCCP/conditions-ranges.ll @@ -231,12 +231,12 @@ define void @f7_nested_conds(i32* %a, i32 %b) { ; CHECK-NEXT: [[C_1:%.*]] = icmp ne i32 [[A_V]], 0 ; CHECK-NEXT: br i1 [[C_1]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: false: -; CHECK-NEXT: br i1 true, label [[TRUE_2:%.*]], label [[TRUE]] +; CHECK-NEXT: br label [[TRUE_2:%.*]] ; CHECK: true.2: ; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; CHECK: true: -; CHECK-NEXT: store i32 [[B:%.*]], i32* [[A]] +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[A]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SCCP/predicateinfo-cond.ll b/llvm/test/Transforms/SCCP/predicateinfo-cond.ll index 8ed96ec9301f..1443cc72c2ef 100644 --- a/llvm/test/Transforms/SCCP/predicateinfo-cond.ll +++ b/llvm/test/Transforms/SCCP/predicateinfo-cond.ll @@ -105,7 +105,7 @@ define void @pr46814(i32 %a) { ; CHECK-NEXT: [[C3:%.*]] = and i1 [[C1]], [[C2]] ; CHECK-NEXT: br i1 [[C3]], label [[IF_1:%.*]], label [[EXIT:%.*]] ; CHECK: if.1: -; CHECK-NEXT: br i1 true, label [[IF_2:%.*]], label [[EXIT]] +; CHECK-NEXT: br label [[IF_2:%.*]] ; CHECK: if.2: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll b/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll index e1c7b3d5662d..9e9d1256c4cc 100644 --- a/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll +++ b/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll @@ -136,13 +136,12 @@ define internal i1 @test2_g(%t1* %h, i32 %i) { ; CHECK-LABEL: define {{[^@]+}}@test2_g ; CHECK-SAME: (%t1* [[H:%.*]], i32 [[I:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 true, label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] +; CHECK-NEXT: br label [[LAND_RHS:%.*]] ; CHECK: land.rhs: ; CHECK-NEXT: [[CALL:%.*]] = call i32 (...) @test2_j() ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i32 [[CALL]], 0 -; CHECK-NEXT: br label [[LAND_END]] +; CHECK-NEXT: br label [[LAND_END:%.*]] ; CHECK: land.end: -; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL1]], [[LAND_RHS]] ] ; CHECK-NEXT: ret i1 undef ; entry: @@ -196,10 +195,9 @@ define internal i32 @test3_k(i8 %h, i32 %i) { ; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[CONV]] to %t1* ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ undef, [[ENTRY:%.*]] ], [ false, [[LOOP]] ] ; CHECK-NEXT: [[CALL:%.*]] = call i1 @test3_g(%t1* [[TMP1]], i32 0) ; CHECK-NEXT: call void @use.1(i1 false) -; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll b/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll index 7596a56b8122..17b37f000407 100644 --- a/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll +++ b/llvm/test/Transforms/SCCP/switch-constantfold-crash.ll @@ -5,11 +5,11 @@ define void @barney() { ; CHECK-LABEL: @barney( ; CHECK-NEXT: bb: -; CHECK-NEXT: br label %bb9 +; CHECK-NEXT: br label [[BB9:%.*]] ; CHECK: bb6: ; CHECK-NEXT: unreachable ; CHECK: bb9: -; CHECK-NEXT: unreachable +; CHECK-NEXT: br label [[BB6:%.*]] ; bb: br label %bb9 @@ -29,9 +29,9 @@ bb9: ; preds = %bb define void @blam() { ; CHECK-LABEL: @blam( ; CHECK-NEXT: bb: -; CHECK-NEXT: br label %bb16 +; CHECK-NEXT: br label [[BB16:%.*]] ; CHECK: bb16: -; CHECK-NEXT: br label %bb38 +; CHECK-NEXT: br label [[BB38:%.*]] ; CHECK: bb38: ; CHECK-NEXT: unreachable ; @@ -62,9 +62,9 @@ bb38: ; preds = %bb16 define void @hoge() { ; CHECK-LABEL: @hoge( ; CHECK-NEXT: bb: -; CHECK-NEXT: br label %bb2 +; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: unreachable +; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/SCCP/switch.ll b/llvm/test/Transforms/SCCP/switch.ll index d895c5629cd4..aae8617a31dd 100644 --- a/llvm/test/Transforms/SCCP/switch.ll +++ b/llvm/test/Transforms/SCCP/switch.ll @@ -23,15 +23,11 @@ define i32 @test_duplicate_successors_phi(i1 %c, i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[SWITCH:%.*]], label [[END:%.*]] ; CHECK: switch: -; CHECK-NEXT: switch i32 -1, label [[SWITCH_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 0, label [[END]] -; CHECK-NEXT: i32 1, label [[END]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[SWITCH_DEFAULT:%.*]] ; CHECK: switch.default: ; CHECK-NEXT: ret i32 -1 ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ 1, [[SWITCH]] ], [ 1, [[SWITCH]] ] -; CHECK-NEXT: ret i32 [[PHI]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; entry: br i1 %c, label %switch, label %end diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll index 2703bdb27dff..23a88c35a93e 100644 --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -216,11 +216,11 @@ define void @rotated_loop_2(i32 %x) { ; IPSCCP: bb3: ; IPSCCP-NEXT: br label [[EXIT]] ; IPSCCP: exit: -; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 5, [[BB3]] ], [ [[A:%.*]], [[EXIT]] ] -; IPSCCP-NEXT: [[A]] = add i32 [[P]], 1 +; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 5, [[BB3]] ] +; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 ; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: call void @use(i1 false) -; IPSCCP-NEXT: br i1 false, label [[EXIT]], label [[EXIT_1:%.*]] +; IPSCCP-NEXT: br label [[EXIT_1:%.*]] ; IPSCCP: exit.1: ; IPSCCP-NEXT: ret void ; @@ -451,10 +451,10 @@ define void @foo(i64* %arg) { ; SCCP-NEXT: [[TMP7:%.*]] = sub i64 3, [[TMP6]] ; SCCP-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 ; SCCP-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; SCCP-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; SCCP-NEXT: [[TMP0:%.*]] = zext i32 [[TMP9]] to i64 ; SCCP-NEXT: br label [[BB11:%.*]] ; SCCP: bb11: -; SCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP10]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] +; SCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP0]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] ; SCCP-NEXT: br label [[BB13:%.*]] ; SCCP: bb13: ; SCCP-NEXT: [[C_1:%.*]] = icmp eq i64 [[TMP12]], 6 @@ -489,10 +489,10 @@ define void @foo(i64* %arg) { ; IPSCCP-NEXT: [[TMP7:%.*]] = sub i64 3, [[TMP6]] ; IPSCCP-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 ; IPSCCP-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 -; IPSCCP-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; IPSCCP-NEXT: [[TMP0:%.*]] = zext i32 [[TMP9]] to i64 ; IPSCCP-NEXT: br label [[BB11:%.*]] ; IPSCCP: bb11: -; IPSCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP10]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] +; IPSCCP-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP0]], [[BB4]] ], [ [[TMP17:%.*]], [[BB18:%.*]] ] ; IPSCCP-NEXT: br label [[BB13:%.*]] ; IPSCCP: bb13: ; IPSCCP-NEXT: [[C_1:%.*]] = icmp eq i64 [[TMP12]], 6