forked from OSchip/llvm-project
[WebAssembly] Optimize the number of routing blocks in FixIrreducibleCFG
Summary: Currently we create a routing block to the dispatch block for every predecessor of every entry. So the total number of routing blocks created will be (# of preds) * (# of entries). But we don't need to do this: we need at most 2 routing blocks per loop entry, one for when the predecessor is inside the loop and one for it is outside the loop. (We can't merge these into one because this will creates another loop cycle between blocks inside and blocks outside) This patch fixes this and creates at most 2 routing blocks per entry. This also renames variable `Split` to `Routing`, which I think is a bit clearer. Reviewers: kripken Subscribers: sunfish, dschuff, sbc100, jgravelle-google, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59462 llvm-svn: 357337
This commit is contained in:
parent
916709e0be
commit
7e7aad1510
|
@ -230,7 +230,7 @@ class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
|
||||||
MachineFunction &MF);
|
MachineFunction &MF);
|
||||||
|
|
||||||
void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
|
void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
|
||||||
MachineFunction &MF);
|
MachineFunction &MF, const ReachabilityGraph &Graph);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static char ID; // Pass identification, replacement for typeid
|
static char ID; // Pass identification, replacement for typeid
|
||||||
|
@ -279,7 +279,7 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MutualLoopEntries.size() > 1) {
|
if (MutualLoopEntries.size() > 1) {
|
||||||
makeSingleEntryLoop(MutualLoopEntries, Blocks, MF);
|
makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
|
||||||
FoundIrreducibility = true;
|
FoundIrreducibility = true;
|
||||||
Changed = true;
|
Changed = true;
|
||||||
break;
|
break;
|
||||||
|
@ -315,9 +315,12 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion(
|
||||||
// Given a set of entries to a single loop, create a single entry for that
|
// Given a set of entries to a single loop, create a single entry for that
|
||||||
// loop by creating a dispatch block for them, routing control flow using
|
// loop by creating a dispatch block for them, routing control flow using
|
||||||
// a helper variable. Also updates Blocks with any new blocks created, so
|
// a helper variable. Also updates Blocks with any new blocks created, so
|
||||||
// that we properly track all the blocks in the region.
|
// that we properly track all the blocks in the region. But this does not update
|
||||||
|
// ReachabilityGraph; this will be updated in the caller of this function as
|
||||||
|
// needed.
|
||||||
void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
|
void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
|
||||||
BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF) {
|
BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
|
||||||
|
const ReachabilityGraph &Graph) {
|
||||||
assert(Entries.size() >= 2);
|
assert(Entries.size() >= 2);
|
||||||
|
|
||||||
// Sort the entries to ensure a deterministic build.
|
// Sort the entries to ensure a deterministic build.
|
||||||
|
@ -385,36 +388,78 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (MachineBasicBlock *Pred : AllPreds) {
|
// This set stores predecessors within this loop.
|
||||||
DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
|
DenseSet<MachineBasicBlock *> InLoop;
|
||||||
|
for (auto *Pred : AllPreds) {
|
||||||
for (auto *Entry : Pred->successors()) {
|
for (auto *Entry : Pred->successors()) {
|
||||||
if (!Entries.count(Entry)) {
|
if (!Entries.count(Entry))
|
||||||
continue;
|
continue;
|
||||||
|
if (Graph.canReach(Entry, Pred)) {
|
||||||
|
InLoop.insert(Pred);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record if each entry has a layout predecessor. This map stores
|
||||||
|
// <<Predecessor is within the loop?, loop entry>, layout predecessor>
|
||||||
|
std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
|
||||||
|
EntryToLayoutPred;
|
||||||
|
for (auto *Pred : AllPreds)
|
||||||
|
for (auto *Entry : Pred->successors())
|
||||||
|
if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
|
||||||
|
EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
|
||||||
|
|
||||||
|
// We need to create at most two routing blocks per entry: one for
|
||||||
|
// predecessors outside the loop and one for predecessors inside the loop.
|
||||||
|
// This map stores
|
||||||
|
// <<Predecessor is within the loop?, loop entry>, routing block>
|
||||||
|
std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
|
||||||
|
for (auto *Pred : AllPreds) {
|
||||||
|
bool PredInLoop = InLoop.count(Pred);
|
||||||
|
for (auto *Entry : Pred->successors()) {
|
||||||
|
if (!Entries.count(Entry) ||
|
||||||
|
Map.count(std::make_pair(InLoop.count(Pred), Entry)))
|
||||||
|
continue;
|
||||||
|
// If there exists a layout predecessor of this entry and this predecessor
|
||||||
|
// is not that, we rather create a routing block after that layout
|
||||||
|
// predecessor to save a branch.
|
||||||
|
if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
|
||||||
|
EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
|
||||||
|
continue;
|
||||||
|
|
||||||
// This is a successor we need to rewrite.
|
// This is a successor we need to rewrite.
|
||||||
MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
|
MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
|
||||||
MF.insert(Pred->isLayoutSuccessor(Entry)
|
MF.insert(Pred->isLayoutSuccessor(Entry)
|
||||||
? MachineFunction::iterator(Entry)
|
? MachineFunction::iterator(Entry)
|
||||||
: MF.end(),
|
: MF.end(),
|
||||||
Split);
|
Routing);
|
||||||
Blocks.insert(Split);
|
Blocks.insert(Routing);
|
||||||
|
|
||||||
// Set the jump table's register of the index of the block we wish to
|
// Set the jump table's register of the index of the block we wish to
|
||||||
// jump to, and jump to the jump table.
|
// jump to, and jump to the jump table.
|
||||||
BuildMI(Split, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
|
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
|
||||||
.addImm(Indices[Entry]);
|
.addImm(Indices[Entry]);
|
||||||
BuildMI(Split, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
|
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
|
||||||
Split->addSuccessor(Dispatch);
|
Routing->addSuccessor(Dispatch);
|
||||||
Map[Entry] = Split;
|
Map[std::make_pair(PredInLoop, Entry)] = Routing;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto *Pred : AllPreds) {
|
||||||
|
bool PredInLoop = InLoop.count(Pred);
|
||||||
// Remap the terminator operands and the successor list.
|
// Remap the terminator operands and the successor list.
|
||||||
for (MachineInstr &Term : Pred->terminators())
|
for (MachineInstr &Term : Pred->terminators())
|
||||||
for (auto &Op : Term.explicit_uses())
|
for (auto &Op : Term.explicit_uses())
|
||||||
if (Op.isMBB() && Indices.count(Op.getMBB()))
|
if (Op.isMBB() && Indices.count(Op.getMBB()))
|
||||||
Op.setMBB(Map[Op.getMBB()]);
|
Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
|
||||||
for (auto Rewrite : Map)
|
|
||||||
Pred->replaceSuccessor(Rewrite.first, Rewrite.second);
|
for (auto *Succ : Pred->successors()) {
|
||||||
|
if (!Entries.count(Succ))
|
||||||
|
continue;
|
||||||
|
auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
|
||||||
|
Pred->replaceSuccessor(Succ, Routing);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a fake default label, because br_table requires one.
|
// Create a fake default label, because br_table requires one.
|
||||||
|
|
|
@ -93,12 +93,18 @@ bb19: ; preds = %bb6
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; A simple loop 2 blocks that are both entries.
|
; A simple loop 2 blocks that are both entries: A1 and A2.
|
||||||
|
; Even though A1 and A2 both have 3 predecessors (A0, A1, and A2), not 6 but
|
||||||
|
; only 4 new routing blocks to the dispatch block should be generated.
|
||||||
|
|
||||||
; CHECK-LABEL: test2:
|
; CHECK-LABEL: test2:
|
||||||
; CHECK: br_if
|
; CHECK: br_if
|
||||||
; CHECK: i32.const $[[REG:[^,]+]]=
|
; CHECK: i32.const $[[REG:[^,]+]]=
|
||||||
|
; CHECK: i32.const $[[REG]]=
|
||||||
; CHECK: br_table $[[REG]],
|
; CHECK: br_table $[[REG]],
|
||||||
|
; CHECK: i32.const $[[REG]]=
|
||||||
|
; CHECK: i32.const $[[REG]]=
|
||||||
|
; CHECK-NOT: i32.const $[[REG]]=
|
||||||
define i32 @test2(i32) {
|
define i32 @test2(i32) {
|
||||||
entry:
|
entry:
|
||||||
br label %A0
|
br label %A0
|
||||||
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
# RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-fix-irreducible-control-flow %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
# This tests if we correctly create at most 2 routing blocks per entry block,
|
||||||
|
# and also whether those routing blocks are generated in the correct place. If
|
||||||
|
# one of the predecessor is the layout predecessor of an entry, a routing block
|
||||||
|
# for the entry should be generated right after the layout predecessor.
|
||||||
|
|
||||||
|
--- |
|
||||||
|
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
||||||
|
target triple = "wasm32-unknown-unknown"
|
||||||
|
|
||||||
|
define void @test0() {
|
||||||
|
pred0:
|
||||||
|
ret void
|
||||||
|
pred1:
|
||||||
|
ret void
|
||||||
|
entry0:
|
||||||
|
ret void
|
||||||
|
entry1:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
# CHECK-LABEL: test0
|
||||||
|
name: test0
|
||||||
|
liveins:
|
||||||
|
- { reg: '$arguments' }
|
||||||
|
body: |
|
||||||
|
bb.0.pred0:
|
||||||
|
successors: %bb.1, %bb.2
|
||||||
|
liveins: $arguments
|
||||||
|
%0:i32 = CONST_I32 100, implicit-def $arguments
|
||||||
|
BR_IF %bb.2, %0:i32, implicit-def $arguments
|
||||||
|
; CHECK: bb.0.pred0:
|
||||||
|
; CHECK: BR_IF %bb.2, %0, implicit-def $arguments
|
||||||
|
|
||||||
|
bb.1.pred1:
|
||||||
|
; predecessors: %bb.1
|
||||||
|
successors: %bb.2, %bb.3
|
||||||
|
BR_IF %bb.3, %0:i32, implicit-def $arguments
|
||||||
|
; CHECK: bb.1.pred1:
|
||||||
|
; CHECK: BR_IF %bb.7, %0, implicit-def $arguments
|
||||||
|
; This falls through to bb.2, so we don't need an additional BR here
|
||||||
|
; CHECK-NOT: BR
|
||||||
|
|
||||||
|
; Routing block for entry0, when predecessor is outside the loop
|
||||||
|
; This routing block is shared between the two predecessors: pred0 and pred1.
|
||||||
|
; CHECK: bb.2:
|
||||||
|
; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments
|
||||||
|
; CHECK: BR %bb.6, implicit-def $arguments
|
||||||
|
|
||||||
|
bb.2.entry0:
|
||||||
|
; predecessors: %bb.0, %bb.1, %bb.1
|
||||||
|
successors: %bb.3
|
||||||
|
BR %bb.3, implicit-def $arguments
|
||||||
|
; CHECK: bb.3.entry0:
|
||||||
|
; CHECK: BR %bb.4, implicit-def $arguments
|
||||||
|
|
||||||
|
; Routing block for entry1, when predecessor is inside the loop
|
||||||
|
; CHECK: bb.4:
|
||||||
|
; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments
|
||||||
|
; CHECK: BR %bb.6, implicit-def $arguments
|
||||||
|
|
||||||
|
bb.3.entry1:
|
||||||
|
; predecessors: %bb.1, %bb.2
|
||||||
|
successors: %bb.2
|
||||||
|
BR %bb.2, implicit-def $arguments
|
||||||
|
; CHECK: bb.5.entry1:
|
||||||
|
; CHECK: BR %bb.8, implicit-def $arguments
|
||||||
|
|
||||||
|
; Dispatch block
|
||||||
|
; CHECK: bb.6:
|
||||||
|
; CHECK: BR_TABLE_I32 %1, %bb.3, %bb.5, %bb.5, implicit-def $arguments
|
||||||
|
|
||||||
|
; Routing block for entry1, when predecessor is outside the loop
|
||||||
|
; CHECK: bb.7:
|
||||||
|
; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments
|
||||||
|
; CHECK: BR %bb.6, implicit-def $arguments
|
||||||
|
|
||||||
|
; Routing block for entry0, when predecessor is inside the loop
|
||||||
|
; CHECK: bb.8:
|
||||||
|
; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments
|
||||||
|
; CHECK: BR %bb.6, implicit-def $arguments
|
Loading…
Reference in New Issue