diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index adaf8ee3785e..7d8e86d9b2c0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -230,7 +230,7 @@ class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { MachineFunction &MF); void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, - MachineFunction &MF); + MachineFunction &MF, const ReachabilityGraph &Graph); public: static char ID; // Pass identification, replacement for typeid @@ -279,7 +279,7 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion( } if (MutualLoopEntries.size() > 1) { - makeSingleEntryLoop(MutualLoopEntries, Blocks, MF); + makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph); FoundIrreducibility = true; Changed = true; break; @@ -315,9 +315,12 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion( // Given a set of entries to a single loop, create a single entry for that // loop by creating a dispatch block for them, routing control flow using // a helper variable. Also updates Blocks with any new blocks created, so -// that we properly track all the blocks in the region. +// that we properly track all the blocks in the region. But this does not update +// ReachabilityGraph; this will be updated in the caller of this function as +// needed. void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( - BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF) { + BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF, + const ReachabilityGraph &Graph) { assert(Entries.size() >= 2); // Sort the entries to ensure a deterministic build. @@ -385,36 +388,78 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( } } - for (MachineBasicBlock *Pred : AllPreds) { - DenseMap Map; + // This set stores predecessors within this loop. + DenseSet InLoop; + for (auto *Pred : AllPreds) { for (auto *Entry : Pred->successors()) { - if (!Entries.count(Entry)) { + if (!Entries.count(Entry)) continue; + if (Graph.canReach(Entry, Pred)) { + InLoop.insert(Pred); + break; } + } + } + + // Record if each entry has a layout predecessor. This map stores + // <, layout predecessor> + std::map, MachineBasicBlock *> + EntryToLayoutPred; + for (auto *Pred : AllPreds) + for (auto *Entry : Pred->successors()) + if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry)) + EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred; + + // We need to create at most two routing blocks per entry: one for + // predecessors outside the loop and one for predecessors inside the loop. + // This map stores + // <, routing block> + std::map, MachineBasicBlock *> Map; + for (auto *Pred : AllPreds) { + bool PredInLoop = InLoop.count(Pred); + for (auto *Entry : Pred->successors()) { + if (!Entries.count(Entry) || + Map.count(std::make_pair(InLoop.count(Pred), Entry))) + continue; + // If there exists a layout predecessor of this entry and this predecessor + // is not that, we rather create a routing block after that layout + // predecessor to save a branch. + if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) && + EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred) + continue; // This is a successor we need to rewrite. - MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); + MachineBasicBlock *Routing = MF.CreateMachineBasicBlock(); MF.insert(Pred->isLayoutSuccessor(Entry) ? MachineFunction::iterator(Entry) : MF.end(), - Split); - Blocks.insert(Split); + Routing); + Blocks.insert(Routing); // Set the jump table's register of the index of the block we wish to // jump to, and jump to the jump table. - BuildMI(Split, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) + BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) .addImm(Indices[Entry]); - BuildMI(Split, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); - Split->addSuccessor(Dispatch); - Map[Entry] = Split; + BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch); + Routing->addSuccessor(Dispatch); + Map[std::make_pair(PredInLoop, Entry)] = Routing; } + } + + for (auto *Pred : AllPreds) { + bool PredInLoop = InLoop.count(Pred); // Remap the terminator operands and the successor list. for (MachineInstr &Term : Pred->terminators()) for (auto &Op : Term.explicit_uses()) if (Op.isMBB() && Indices.count(Op.getMBB())) - Op.setMBB(Map[Op.getMBB()]); - for (auto Rewrite : Map) - Pred->replaceSuccessor(Rewrite.first, Rewrite.second); + Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]); + + for (auto *Succ : Pred->successors()) { + if (!Entries.count(Succ)) + continue; + auto *Routing = Map[std::make_pair(PredInLoop, Succ)]; + Pred->replaceSuccessor(Succ, Routing); + } } // Create a fake default label, because br_table requires one. diff --git a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.ll b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.ll index 651c113fe7c8..00f396f947ad 100644 --- a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.ll +++ b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.ll @@ -93,12 +93,18 @@ bb19: ; preds = %bb6 ret void } -; A simple loop 2 blocks that are both entries. +; A simple loop 2 blocks that are both entries: A1 and A2. +; Even though A1 and A2 both have 3 predecessors (A0, A1, and A2), not 6 but +; only 4 new routing blocks to the dispatch block should be generated. ; CHECK-LABEL: test2: ; CHECK: br_if ; CHECK: i32.const $[[REG:[^,]+]]= +; CHECK: i32.const $[[REG]]= ; CHECK: br_table $[[REG]], +; CHECK: i32.const $[[REG]]= +; CHECK: i32.const $[[REG]]= +; CHECK-NOT: i32.const $[[REG]]= define i32 @test2(i32) { entry: br label %A0 diff --git a/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir new file mode 100644 index 000000000000..7fd499b4dd25 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/irreducible-cfg.mir @@ -0,0 +1,84 @@ +# RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-fix-irreducible-control-flow %s -o - | FileCheck %s + +# This tests if we correctly create at most 2 routing blocks per entry block, +# and also whether those routing blocks are generated in the correct place. If +# one of the predecessor is the layout predecessor of an entry, a routing block +# for the entry should be generated right after the layout predecessor. + +--- | + target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" + target triple = "wasm32-unknown-unknown" + + define void @test0() { + pred0: + ret void + pred1: + ret void + entry0: + ret void + entry1: + ret void + } +... + +--- +# CHECK-LABEL: test0 +name: test0 +liveins: + - { reg: '$arguments' } +body: | + bb.0.pred0: + successors: %bb.1, %bb.2 + liveins: $arguments + %0:i32 = CONST_I32 100, implicit-def $arguments + BR_IF %bb.2, %0:i32, implicit-def $arguments + ; CHECK: bb.0.pred0: + ; CHECK: BR_IF %bb.2, %0, implicit-def $arguments + + bb.1.pred1: + ; predecessors: %bb.1 + successors: %bb.2, %bb.3 + BR_IF %bb.3, %0:i32, implicit-def $arguments + ; CHECK: bb.1.pred1: + ; CHECK: BR_IF %bb.7, %0, implicit-def $arguments + ; This falls through to bb.2, so we don't need an additional BR here + ; CHECK-NOT: BR + + ; Routing block for entry0, when predecessor is outside the loop + ; This routing block is shared between the two predecessors: pred0 and pred1. + ; CHECK: bb.2: + ; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments + ; CHECK: BR %bb.6, implicit-def $arguments + + bb.2.entry0: + ; predecessors: %bb.0, %bb.1, %bb.1 + successors: %bb.3 + BR %bb.3, implicit-def $arguments + ; CHECK: bb.3.entry0: + ; CHECK: BR %bb.4, implicit-def $arguments + + ; Routing block for entry1, when predecessor is inside the loop + ; CHECK: bb.4: + ; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments + ; CHECK: BR %bb.6, implicit-def $arguments + + bb.3.entry1: + ; predecessors: %bb.1, %bb.2 + successors: %bb.2 + BR %bb.2, implicit-def $arguments + ; CHECK: bb.5.entry1: + ; CHECK: BR %bb.8, implicit-def $arguments + + ; Dispatch block + ; CHECK: bb.6: + ; CHECK: BR_TABLE_I32 %1, %bb.3, %bb.5, %bb.5, implicit-def $arguments + + ; Routing block for entry1, when predecessor is outside the loop + ; CHECK: bb.7: + ; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments + ; CHECK: BR %bb.6, implicit-def $arguments + + ; Routing block for entry0, when predecessor is inside the loop + ; CHECK: bb.8: + ; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments + ; CHECK: BR %bb.6, implicit-def $arguments