From 7e8be28661b66e1c0358f1bab199ea1e377ef024 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Mon, 10 Apr 2017 22:28:22 +0000 Subject: [PATCH] CodeGen: BlockPlacement: Don't always tail-duplicate with no other successor. The math works out where it can actually be counter-productive. The probability calculations correctly handle the case where the alternative is 0 probability, rely on those calculations. Includes a test case that demonstrates the problem. llvm-svn: 299892 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 6 +-- .../X86/tail-dup-merge-loop-headers.ll | 4 +- .../X86/tail-dup-no-other-successor.ll | 53 +++++++++++++++++++ 3 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/X86/tail-dup-no-other-successor.ll diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index c03ffdd3732d..ffd88b7c8e30 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1489,11 +1489,7 @@ MachineBlockPlacement::selectBestSuccessor( if (DupProb < BestProb) break; if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) - // If tail duplication gives us fallthrough when we otherwise wouldn't - // have it, that is a strict gain. - && (BestSucc.BB == nullptr - || isProfitableToTailDup(BB, Succ, BestProb, Chain, - BlockFilter))) { + && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { DEBUG( dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " << DupProb diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll index 80346018ff80..197fd72586a5 100644 --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -6,13 +6,13 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: tail_dup_merge_loops ; CHECK: # %entry ; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %exit -; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_exit ; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_latch ; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_test +; CHECK-NOT: # %{{[a-zA-Z_]+}} +; CHECK: # %exit define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 { entry: %notlhs674.i = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/X86/tail-dup-no-other-successor.ll b/llvm/test/CodeGen/X86/tail-dup-no-other-successor.ll new file mode 100644 index 000000000000..6fa6f94e6530 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-no-other-successor.ll @@ -0,0 +1,53 @@ +; RUN: llc -O3 -o - %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @effect(i32); + +; After the loop gets laid out, loop.end is the only successor, but can't be +; laid out because of the CFG dependency from top.fakephi. The calculations show +; that it isn't profitable to tail-duplicate in this case, because of the +; effects on fallthrough from %loop.end +; CHECK-LABEL: {{^}}no_successor_still_no_taildup: +; CHECK: %entry +; CHECK: %loop.top +; CHECK: %loop.latch +; CHECK: %top.fakephi +; CHECK: %loop.end +; CHECK: %false +; CHECK: %ret +define void @no_successor_still_no_taildup (i32 %count, i32 %key) { +entry: + br label %loop.top + +loop.top: + %i.loop.top = phi i32 [ %count, %entry ], [ %i.latch, %loop.latch ] + %cmp.top = icmp eq i32 %i.loop.top, %key + call void @effect(i32 0) + br i1 %cmp.top, label %top.fakephi, label %loop.latch, !prof !1 + +loop.latch: + %i.latch = sub i32 %i.loop.top, 1 + %cmp.latch = icmp eq i32 %i.latch, 0 + call void @effect(i32 1) + br i1 %cmp.top, label %loop.top, label %loop.end, !prof !2 + +top.fakephi: + call void @effect(i32 2) + br label %loop.end + +loop.end: + %cmp.end = icmp eq i32 %count, 0 + br i1 %cmp.end, label %ret, label %false, !prof !3 + +false: + call void @effect(i32 4) + br label %ret + +ret: + ret void +} + +!1 = !{!"branch_weights", i32 1, i32 1} +!2 = !{!"branch_weights", i32 5, i32 1} +!3 = !{!"branch_weights", i32 1, i32 2}