Distribute the weight on the edge from switch to default statement to edges generated in lowering switch.

Currently, when edge weights are assigned to edges that are created when lowering switch statement, the weight on the edge to default statement (let's call it "default weight" here) is not considered. We need to distribute this weight properly. However, without value profiling, we have no idea how to distribute it. In this patch, I applied the heuristic that this weight is evenly distributed to successors.

For example, given a switch statement with cases 1,2,3,5,10,11,20, and every edge from switch to each successor has weight 10. If there is a binary search tree built to test if n < 10, then its two out-edges will have weight 4x10+10/2 = 45 and 3x10 + 10/2 = 35 respectively (currently they are 40 and 30 without considering the default weight). Each distribution (which is 5 here) will be stored in each SwitchWorkListItem for further distribution.

There are some exceptions:

For a jump table header which doesn't have any edge to default statement, we don't distribute the default weight to it.
For a bit test header which covers a contiguous range and hence has no edges to default statement, we don't distribute the default weight to it.
When the branch checks a single value or a contiguous range with no edge to default statement, we don't distribute the default weight to it.
In other cases, the default weight is evenly distributed to successors.

Differential Revision: http://reviews.llvm.org/D12418

llvm-svn: 246522
This commit is contained in:
Cong Hou 2015-09-01 01:42:16 +00:00
parent 7fa450c6ae
commit 511298b919
13 changed files with 264 additions and 51 deletions

View File

@ -1924,8 +1924,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
uint32_t DefaultWeight = getEdgeWeight(SwitchBB, B.Default);
addSuccessorWithWeight(SwitchBB, B.Default, DefaultWeight);
addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight);
addSuccessorWithWeight(SwitchBB, MBB, B.Weight);
SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
@ -8037,7 +8036,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
// Compute total weight.
uint32_t UnhandledWeights = 0;
uint32_t DefaultWeight = W.DefaultWeight;
uint32_t UnhandledWeights = DefaultWeight;
for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
UnhandledWeights += I->Weight;
assert(UnhandledWeights >= I->Weight && "Weight overflow!");
@ -8067,14 +8067,24 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *JumpMBB = JT->MBB;
CurMF->insert(BBI, JumpMBB);
// Collect the sum of weights of outgoing edges from JumpMBB, which will
// be the edge weight on CurMBB->JumpMBB.
uint32_t JumpWeight = 0;
for (auto Succ : JumpMBB->successors())
JumpWeight += getEdgeWeight(JumpMBB, Succ);
uint32_t FallthruWeight = getEdgeWeight(CurMBB, Fallthrough);
uint32_t JumpWeight = I->Weight;
uint32_t FallthroughWeight = UnhandledWeights;
addSuccessorWithWeight(CurMBB, Fallthrough, FallthruWeight);
// If Fallthrough is a target of the jump table, we evenly distribute
// the weight on the edge to Fallthrough to successors of CurMBB.
// Also update the weight on the edge from JumpMBB to Fallthrough.
for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
SE = JumpMBB->succ_end();
SI != SE; ++SI) {
if (*SI == Fallthrough) {
JumpWeight += DefaultWeight / 2;
FallthroughWeight -= DefaultWeight / 2;
JumpMBB->setSuccWeight(SI, DefaultWeight / 2);
break;
}
}
addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight);
addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight);
// The jump table header will be inserted in our current block, do the
@ -8101,8 +8111,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->Parent = CurMBB;
BTB->Default = Fallthrough;
// If we're in the right place, emit the bit test header header right now.
if (CurMBB ==SwitchMBB) {
BTB->DefaultWeight = UnhandledWeights;
// If the cases in bit test don't form a contiguous range, we evenly
// distribute the weight on the edge to Fallthrough to two successors
// of CurMBB.
if (!BTB->ContiguousRange) {
BTB->Weight += DefaultWeight / 2;
BTB->DefaultWeight -= DefaultWeight / 2;
}
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
BTB->Emitted = true;
}
@ -8167,8 +8186,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
CaseClusterIt LastLeft = W.FirstCluster;
CaseClusterIt FirstRight = W.LastCluster;
uint32_t LeftWeight = LastLeft->Weight;
uint32_t RightWeight = FirstRight->Weight;
uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2;
uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2;
// Move LastLeft and FirstRight towards each other from opposite directions to
// find a partitioning of the clusters which balances the weight on both
@ -8254,7 +8273,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, LeftMBB);
WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
WorkList.push_back(
{LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
@ -8269,7 +8289,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
} else {
RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
FuncInfo.MF->insert(BBI, RightMBB);
WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
WorkList.push_back(
{RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2});
// Put Cond in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(Cond);
}
@ -8370,7 +8391,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB);
WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();

View File

@ -288,7 +288,7 @@ private:
BitTestInfo C, uint32_t W)
: First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)),
Weight(W) {}
Weight(W), DefaultWeight(0) {}
APInt First;
APInt Range;
const Value *SValue;
@ -300,6 +300,7 @@ private:
MachineBasicBlock *Default;
BitTestInfo Cases;
uint32_t Weight;
uint32_t DefaultWeight;
};
/// Minimum jump table density, in percent.
@ -341,6 +342,7 @@ private:
CaseClusterIt LastCluster;
const ConstantInt *GE;
const ConstantInt *LT;
uint32_t DefaultWeight;
};
typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;

View File

@ -1490,9 +1490,7 @@ SelectionDAGISel::FinishBasicBlock() {
CodeGenAndEmitDAG();
}
uint32_t UnhandledWeight = 0;
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
uint32_t UnhandledWeight = SDB->BitTestCases[i].Weight;
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;

View File

@ -7,9 +7,9 @@
;
; CHECK: // %if.then.120
;
; REGULAR: str w22, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
; Check that w22 wouldn't need to be spilled since it is never reused.
; REGULAR-NOT: {{[wx]}}22{{,?}}
; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
; Check that w21 wouldn't need to be spilled since it is never reused.
; REGULAR-NOT: {{[wx]}}21{{,?}}
;
; Check that w22 is used to carry a value through the call.
; DEFERRED-NOT: str {{[wx]}}22,
@ -22,8 +22,8 @@
; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
; DEFERRED-NOT: ldr {{[wx]}}22,
;
; REGULAR-NOT: {{[wx]}}22{{,?}}
; REGUAL: ldr w22, [sp, #[[OFFSET]]] // 4-byte Folded Reload
; REGULAR-NOT: {{[wx]}}21{{,?}}
; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
;
; End of the basic block we are interested in.
; CHECK: b

View File

@ -13,7 +13,7 @@ define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]
; CHECK: [[SKIP_TABLE]]:
; CHECK: add pc, lr, {{r[0-9]+}}
; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}}
br i1 %tst, label %simple, label %complex
simple:

View File

@ -16,9 +16,9 @@ entry:
i64 5, label %sw.bb1
], !prof !0
; CHECK: BB#0: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
; CHECK: Successors according to CFG: BB#2(64) BB#4(21)
; CHECK: BB#4: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
; CHECK: Successors according to CFG: BB#1(10) BB#5(11)
; CHECK: BB#5: derived from LLVM BB %entry
; CHECK: Successors according to CFG: BB#1(4) BB#3(7)

View File

@ -35,7 +35,7 @@ bb:
br i1 %tmp4, label %bb1, label %bb8
bb1:
; CHECK: %bb6
; CHECK: %entry
; CHECK: it eq
; CHECK-NEXT: ldreq
; CHECK-NEXT: it eq
@ -54,8 +54,9 @@ bb1:
bb4:
; CHECK-PIC: cmp
; CHECK-PIC: cmp
; CHECK-PIC: cmp
; CHECK-PIC-NEXT: bne
; CHECK-PIC-NEXT: %bb4
; CHECK-PIC: %bb6
; CHECK-PIC-NEXT: movs
; CHECK-PIC-NEXT: add
; CHECK-PIC-NEXT: pop

View File

@ -7,9 +7,11 @@
; CHECK-NEXT: %if.else163
; CHECK-NEXT: mov.w
; CHECK-NEXT: b
; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173
; CHECK-NEXT: mov.w
; CHECK-NEXT: pop
; CHECK-NEXT: %if.else145
; CHECK-NEXT: mov.w
; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173
%struct.hc = type { i32, i32, i32, i32 }

View File

@ -6,8 +6,8 @@
define i32 @test(i32 %argc, i8** %argv) nounwind {
entry:
; CHECK: cmpl $2
; CHECK-NEXT: jne
; CHECK-NEXT: %bb2
; CHECK-NEXT: je
; CHECK-NEXT: %entry
switch i32 %argc, label %UnifiedReturnBlock [
i32 1, label %bb

View File

@ -5,7 +5,7 @@
; CHECK: movabsq $2305843009482129440, %r
; CHECK-NEXT: btq %rax, %r
; CHECK-NEXT: jae
; CHECK-NEXT: jb
; CHECK: movl $671088640, %e
; CHECK-NEXT: btq %rax, %r
; CHECK-NEXT: jae
@ -145,13 +145,13 @@ sw.epilog:
; CHECK: cmpl $10
; CHECK: je
; CHECK: cmpl $20
; CHECK: je
; CHECK: cmpl $30
; CHECK: jne
; CHECK: cmpl $40
; CHECK: je
; CHECK: cmpl $50
; CHECK: jne
; CHECK: cmpl $30
; CHECK: jne
; CHECK: je
; CHECK: cmpl $60
; CHECK: jne
}

View File

@ -1,19 +1,20 @@
; RUN: llc -march=x86-64 -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s
declare void @foo(i32)
; CHECK-LABEL: test
define void @test(i32 %x) nounwind {
entry:
switch i32 %x, label %sw.default [
i32 54, label %sw.bb
i32 55, label %sw.bb
i32 56, label %sw.bb
i32 58, label %sw.bb
i32 67, label %sw.bb
i32 68, label %sw.bb
i32 134, label %sw.bb
i32 140, label %sw.bb
i32 1, label %sw.bb
i32 155, label %sw.bb
i32 156, label %sw.bb
i32 157, label %sw.bb
i32 158, label %sw.bb
i32 159, label %sw.bb
i32 1134, label %sw.bb
i32 1140, label %sw.bb
], !prof !1
sw.bb:
@ -30,12 +31,199 @@ sw.epilog:
; Check if weights are correctly assigned to edges generated from switch
; statement.
;
; CHECK: BB#0:
; BB#0 to BB#4: [0, 1133] (65 = 60 + 5)
; BB#0 to BB#5: [1134, UINT32_MAX] (25 = 20 + 5)
; CHECK: Successors according to CFG: BB#4(65) BB#5(25)
;
; CHECK: BB#4:
; CHECK: Successors according to CFG: BB#1(10) BB#6(10)
; BB#4 to BB#1: [155, 159] (50)
; BB#4 to BB#5: [0, 1133] - [155, 159] (15 = 10 + 5)
; CHECK: Successors according to CFG: BB#1(50) BB#7(15)
;
; CHECK: BB#5:
; BB#5 to BB#1: {1140} (10)
; BB#5 to BB#6: [1134, UINT32_MAX] - {1140} (15 = 10 + 5)
; CHECK: Successors according to CFG: BB#1(10) BB#6(15)
;
; CHECK: BB#6:
; CHECK: Successors according to CFG: BB#1(10) BB#2(10)
; BB#6 to BB#1: {1134} (10)
; BB#6 to BB#2: [1134, UINT32_MAX] - {1134, 1140} (5)
; CHECK: Successors according to CFG: BB#1(10) BB#2(5)
}
declare void @foo(i32)
; CHECK-LABEL: test2
define void @test2(i32 %x) nounwind {
entry:
; In this switch statement, there is an edge from jump table to default
; statement.
switch i32 %x, label %sw.default [
i32 1, label %sw.bb
i32 10, label %sw.bb2
i32 11, label %sw.bb3
i32 12, label %sw.bb4
i32 13, label %sw.bb5
i32 14, label %sw.bb5
], !prof !3
sw.bb:
call void @foo(i32 0)
br label %sw.epilog
sw.bb2:
call void @foo(i32 2)
br label %sw.epilog
sw.bb3:
call void @foo(i32 3)
br label %sw.epilog
sw.bb4:
call void @foo(i32 4)
br label %sw.epilog
sw.bb5:
call void @foo(i32 5)
br label %sw.epilog
sw.default:
call void @foo(i32 1)
br label %sw.epilog
sw.epilog:
ret void
; Check if weights are correctly assigned to edges generated from switch
; statement.
;
; CHECK: BB#0:
; BB#0 to BB#6: {0} + [15, UINT32_MAX] (5)
; BB#0 to BB#8: [1, 14] (jump table) (65 = 60 + 5)
; CHECK: Successors according to CFG: BB#6(5) BB#8(65)
;
; CHECK: BB#8:
; BB#8 to BB#1: {1} (10)
; BB#8 to BB#6: [2, 9] (5)
; BB#8 to BB#2: {10} (10)
; BB#8 to BB#3: {11} (10)
; BB#8 to BB#4: {12} (10)
; BB#8 to BB#5: {13, 14} (20)
; CHECK: Successors according to CFG: BB#1(10) BB#6(5) BB#2(10) BB#3(10) BB#4(10) BB#5(20)
}
; CHECK-LABEL: test3
define void @test3(i32 %x) nounwind {
entry:
; In this switch statement, there is no edge from jump table to default
; statement.
switch i32 %x, label %sw.default [
i32 10, label %sw.bb
i32 11, label %sw.bb2
i32 12, label %sw.bb3
i32 13, label %sw.bb4
i32 14, label %sw.bb5
], !prof !2
sw.bb:
call void @foo(i32 0)
br label %sw.epilog
sw.bb2:
call void @foo(i32 2)
br label %sw.epilog
sw.bb3:
call void @foo(i32 3)
br label %sw.epilog
sw.bb4:
call void @foo(i32 4)
br label %sw.epilog
sw.bb5:
call void @foo(i32 5)
br label %sw.epilog
sw.default:
call void @foo(i32 1)
br label %sw.epilog
sw.epilog:
ret void
; Check if weights are correctly assigned to edges generated from switch
; statement.
;
; CHECK: BB#0:
; BB#0 to BB#6: [0, 9] + [15, UINT32_MAX] {10}
; BB#0 to BB#8: [10, 14] (jump table) (50)
; CHECK: Successors according to CFG: BB#6(10) BB#8(50)
;
; CHECK: BB#8:
; BB#8 to BB#1: {10} (10)
; BB#8 to BB#2: {11} (10)
; BB#8 to BB#3: {12} (10)
; BB#8 to BB#4: {13} (10)
; BB#8 to BB#5: {14} (10)
; CHECK: Successors according to CFG: BB#1(10) BB#2(10) BB#3(10) BB#4(10) BB#5(10)
}
; CHECK-LABEL: test4
define void @test4(i32 %x) nounwind {
entry:
; In this switch statement, there is no edge from bit test to default basic
; block.
switch i32 %x, label %sw.default [
i32 1, label %sw.bb
i32 111, label %sw.bb2
i32 112, label %sw.bb3
i32 113, label %sw.bb3
i32 114, label %sw.bb2
i32 115, label %sw.bb2
], !prof !3
sw.bb:
call void @foo(i32 0)
br label %sw.epilog
sw.bb2:
call void @foo(i32 2)
br label %sw.epilog
sw.bb3:
call void @foo(i32 3)
br label %sw.epilog
sw.default:
call void @foo(i32 1)
br label %sw.epilog
sw.epilog:
ret void
; Check if weights are correctly assigned to edges generated from switch
; statement.
;
; CHECK: BB#0:
; BB#0 to BB#6: [0, 110] + [116, UINT32_MAX] (20)
; BB#0 to BB#7: [111, 115] (bit test) (50)
; CHECK: Successors according to CFG: BB#6(20) BB#7(50)
;
; CHECK: BB#7:
; BB#7 to BB#2: {111, 114, 115} (30)
; BB#7 to BB#3: {112, 113} (20)
; CHECK: Successors according to CFG: BB#2(30) BB#3(20)
}
!1 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10}
!2 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10}
!3 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10}

View File

@ -13,8 +13,8 @@ entry:
; CHECK-LABEL: test1:
; CHECK-NOT: unr
; CHECK: cmpl $10
; CHECK: bar
; CHECK: cmpl $20
; CHECK: bar
if.then:
tail call void @unr(i32 23) noreturn nounwind

View File

@ -51,7 +51,7 @@ return: ret void
; CHECK-LABEL: simple_ranges
; CHECK: leal -100
; CHECK: cmpl $4
; CHECK: jae
; CHECK: jb
; CHECK: cmpl $3
; CHECK: ja