forked from OSchip/llvm-project
CodeGen: BlockPlacement: Don't always tail-duplicate with no other successor.
The math works out where it can actually be counter-productive. The probability calculations correctly handle the case where the alternative is 0 probability, rely on those calculations. Includes a test case that demonstrates the problem. llvm-svn: 299892
This commit is contained in:
parent
ee51a20164
commit
7e8be28661
|
@ -1489,11 +1489,7 @@ MachineBlockPlacement::selectBestSuccessor(
|
|||
if (DupProb < BestProb)
|
||||
break;
|
||||
if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
|
||||
// If tail duplication gives us fallthrough when we otherwise wouldn't
|
||||
// have it, that is a strict gain.
|
||||
&& (BestSucc.BB == nullptr
|
||||
|| isProfitableToTailDup(BB, Succ, BestProb, Chain,
|
||||
BlockFilter))) {
|
||||
&& (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
|
||||
DEBUG(
|
||||
dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
|
||||
<< DupProb
|
||||
|
|
|
@ -6,13 +6,13 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; CHECK-LABEL: tail_dup_merge_loops
|
||||
; CHECK: # %entry
|
||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
||||
; CHECK: # %exit
|
||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
||||
; CHECK: # %inner_loop_exit
|
||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
||||
; CHECK: # %inner_loop_latch
|
||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
||||
; CHECK: # %inner_loop_test
|
||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
||||
; CHECK: # %exit
|
||||
define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%notlhs674.i = icmp eq i32 %a, 0
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
; RUN: llc -O3 -o - %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
declare void @effect(i32);
|
||||
|
||||
; After the loop gets laid out, loop.end is the only successor, but can't be
|
||||
; laid out because of the CFG dependency from top.fakephi. The calculations show
|
||||
; that it isn't profitable to tail-duplicate in this case, because of the
|
||||
; effects on fallthrough from %loop.end
|
||||
; CHECK-LABEL: {{^}}no_successor_still_no_taildup:
|
||||
; CHECK: %entry
|
||||
; CHECK: %loop.top
|
||||
; CHECK: %loop.latch
|
||||
; CHECK: %top.fakephi
|
||||
; CHECK: %loop.end
|
||||
; CHECK: %false
|
||||
; CHECK: %ret
|
||||
define void @no_successor_still_no_taildup (i32 %count, i32 %key) {
|
||||
entry:
|
||||
br label %loop.top
|
||||
|
||||
loop.top:
|
||||
%i.loop.top = phi i32 [ %count, %entry ], [ %i.latch, %loop.latch ]
|
||||
%cmp.top = icmp eq i32 %i.loop.top, %key
|
||||
call void @effect(i32 0)
|
||||
br i1 %cmp.top, label %top.fakephi, label %loop.latch, !prof !1
|
||||
|
||||
loop.latch:
|
||||
%i.latch = sub i32 %i.loop.top, 1
|
||||
%cmp.latch = icmp eq i32 %i.latch, 0
|
||||
call void @effect(i32 1)
|
||||
br i1 %cmp.top, label %loop.top, label %loop.end, !prof !2
|
||||
|
||||
top.fakephi:
|
||||
call void @effect(i32 2)
|
||||
br label %loop.end
|
||||
|
||||
loop.end:
|
||||
%cmp.end = icmp eq i32 %count, 0
|
||||
br i1 %cmp.end, label %ret, label %false, !prof !3
|
||||
|
||||
false:
|
||||
call void @effect(i32 4)
|
||||
br label %ret
|
||||
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = !{!"branch_weights", i32 1, i32 1}
|
||||
!2 = !{!"branch_weights", i32 5, i32 1}
|
||||
!3 = !{!"branch_weights", i32 1, i32 2}
|
Loading…
Reference in New Issue