From 41cf1a5dfb499d356398b4bd10e0f874a63bc954 Mon Sep 17 00:00:00 2001
From: Cong Hou <congh@google.com>
Date: Wed, 18 Nov 2015 00:52:52 +0000
Subject: [PATCH] Improving edge probabilities computation when choosing the
 best successor in machine block placement.

When looking for the best successor from the outer loop for a block
belonging to an inner loop, the edge probability computation can be
improved so that edges in the inner loop are ignored. For example,
suppose we are building chains for the non-loop part of the following
code, and looking for B1's best successor. Assume the true body is very
hot, then B3 should be the best candidate. However, because of the
existence of the back edge from B1 to B0, the probability from B1 to B3
can be very small, preventing B3 to be its successor. In this patch, when
computing the probability of the edge from B1 to B3, the weight on the
back edge B1->B0 is ignored, so that B1->B3 will have 100% probability.

if (...)
  do {
    B0;
    ... // some branches
    B1;
  } while(...);
else
  B2;
B3;


Differential revision: http://reviews.llvm.org/D10825

llvm-svn: 253414
---
 llvm/lib/CodeGen/MachineBlockPlacement.cpp    |  60 ++++++---
 ...ode_placement_ignore_succ_in_inner_loop.ll | 123 ++++++++++++++++++
 2 files changed, 168 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll

diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index b12fcf2940ef..fba33eb93d5f 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -389,22 +389,50 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
   uint32_t BestWeight = 0;
   uint32_t WeightScale = 0;
   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
-  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
-  for (MachineBasicBlock *Succ : BB->successors()) {
-    if (BlockFilter && !BlockFilter->count(Succ))
-      continue;
-    BlockChain &SuccChain = *BlockToChain[Succ];
-    if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Already merged!\n");
-      continue;
-    }
-    if (Succ != *SuccChain.begin()) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
-      continue;
-    }
 
+  // Adjust sum of weights by excluding weights on edges pointing to blocks that
+  // is either not in BlockFilter or is already in the current chain. Consider
+  // the following CFG:
+  //
+  //     --->A
+  //     |  / \
+  //     | B   C
+  //     |  \ / \
+  //     ----D   E
+  //
+  // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+  // A->C is chosen as a fall-through, D won't be selected as a successor of C
+  // due to CFG constraint (the probability of C->D is not greater than
+  // HotProb). If we exclude E that is not in BlockFilter when calculating the
+  // probability of C->D, D will be selected and we will get A C D B as the
+  // layout of this loop.
+  uint32_t AdjustedSumWeight = SumWeight;
+  SmallVector<MachineBasicBlock *, 4> Successors;
+  for (MachineBasicBlock *Succ : BB->successors()) {
+    bool SkipSucc = false;
+    if (BlockFilter && !BlockFilter->count(Succ)) {
+      SkipSucc = true;
+    } else {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (SuccChain == &Chain) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ)
+                     << " -> Already merged!\n");
+        SkipSucc = true;
+      } else if (Succ != *SuccChain->begin()) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
+        continue;
+      }
+    }
+    if (SkipSucc)
+      AdjustedSumWeight -= MBPI->getEdgeWeight(BB, Succ) / WeightScale;
+    else
+      Successors.push_back(Succ);
+  }
+
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock *Succ : Successors) {
     uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
-    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+    BranchProbability SuccProb(SuccWeight / WeightScale, AdjustedSumWeight);
 
     // If we outline optional branches, look whether Succ is unavoidable, i.e.
     // dominates all terminators of the MachineFunction. If it does, other
@@ -432,6 +460,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
 
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
+    BlockChain &SuccChain = *BlockToChain[Succ];
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
         DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
@@ -441,8 +470,9 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
 
       // Make sure that a hot successor doesn't have a globally more
       // important predecessor.
+      BranchProbability RealSuccProb(SuccWeight / WeightScale, SumWeight);
       BlockFrequency CandidateEdgeFreq =
-          MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+          MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
       bool BadCFGConflict = false;
       for (MachineBasicBlock *Pred : Succ->predecessors()) {
         if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
diff --git a/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll b/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
new file mode 100644
index 000000000000..79b4883fb1d6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() {
+; Test that when determining the edge probability from a node in an inner loop
+; to a node in an outer loop, the weights on edges in the inner loop should be
+; ignored if we are building the chain for the outer loop.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %while.body, label %if.end.1, !prof !1
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.then.1, label %while.cond
+
+if.then.1:
+  call void @d()
+  br label %while.cond
+
+while.cond:
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %while.body, label %if.end
+
+if.end.1:
+  call void @d()
+  br label %if.end
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @bar() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its peer loop, the weights on edges in the first loop should be
+; ignored.
+;
+; CHECK-LABEL: bar:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.then, label %while.body, !prof !2
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %while.body, label %if.end, !prof !2
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @par() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its outer loop, the weights on edges in the outer loop should be
+; ignored if we are building the chain for the inner loop.
+;
+; CHECK-LABEL: par:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq b
+
+entry:
+  br label %if.cond
+
+if.cond:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !3
+
+if.then:
+  call void @b()
+  br label %if.end
+
+if.else:
+  call void @c()
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.end, label %exit, !prof !4
+
+if.end:
+  call void @d()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.cond, label %if.end.2, !prof !2
+
+if.end.2:
+  call void @e()
+  br label %if.cond
+
+exit:
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+
+!1 = !{!"branch_weights", i32 10, i32 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 100}
+!4 = !{!"branch_weights", i32 1, i32 1}