forked from OSchip/llvm-project
[WebAssembly] Make BR_TABLE non-duplicable
Summary:
After their range checks were removed in 7f50c15be5
, br_tables
started being duplicated into their predecessors by tail
folding. Unfortunately, when the br_tables were in loops this
transformation introduced bad irreducible control flow which was later
expanded into even more br_tables. This commit abuses the
`isNotDuplicable` property to prevent this irreducible control flow
from being introduced. This change saves a few dozen bytes of code
size and has a negligible affect on performance for most of the large
Emscripten benchmarks, but can improve performance significantly on
microbenchmarks of switches in loops.
Reviewers: aheejin, dschuff
Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81628
This commit is contained in:
parent
3b43f00629
commit
c5d012341e
|
@ -40,21 +40,25 @@ def brlist : Operand<i32> {
|
|||
let PrintMethod = "printBrList";
|
||||
}
|
||||
|
||||
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
|
||||
// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
|
||||
// currently.
|
||||
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
|
||||
// Duplicating a BR_TABLE is almost never a good idea. In particular, it can
|
||||
// lead to some nasty irreducibility due to tail merging when the br_table is in
|
||||
// a loop.
|
||||
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1 in {
|
||||
|
||||
defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
|
||||
(outs), (ins brlist:$brl),
|
||||
[(WebAssemblybr_table I32:$index)],
|
||||
"br_table \t$index", "br_table \t$brl",
|
||||
0x0e>;
|
||||
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
|
||||
// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
|
||||
// currently.
|
||||
defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
|
||||
(outs), (ins brlist:$brl),
|
||||
[(WebAssemblybr_table I64:$index)],
|
||||
"br_table \t$index", "br_table \t$brl",
|
||||
0x0e>;
|
||||
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
|
||||
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1
|
||||
|
||||
// This is technically a control-flow instruction, since all it affects is the
|
||||
// IP.
|
||||
|
|
|
@ -13,36 +13,21 @@ target triple = "wasm32"
|
|||
|
||||
; Just check the barest skeleton of the structure
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: i32.load
|
||||
; CHECK: i32.load
|
||||
; CHECK: i32.const
|
||||
; CHECK: i32.add $push[[DEST:.+]]=
|
||||
; CHECK: br_table $pop[[DEST]]
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: loop
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: br_table ${{[^,]+}}, 0, 1, 2, 2
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: br_table ${{[^,]+}}, 1, 2, 0
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: end_block
|
||||
; CHECK: br
|
||||
; CHECK: end_loop
|
||||
; CHECK: end_function
|
||||
; CHECK: test1.targets:
|
||||
; CHECK-NEXT: .int32
|
||||
; CHECK-NEXT: .int32
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
|
||||
|
||||
;; Test that a small but nontrivial switch in a loop (like in a
|
||||
;; bytecode interpreter) lowers reasonably without any irreducible
|
||||
;; control flow being introduced.
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
||||
target triple = "wasm32"
|
||||
|
||||
declare void @a(i32*)
|
||||
declare void @b(i32*)
|
||||
|
||||
; CHECK-LABEL: switch_in_loop:
|
||||
; CHECK-NEXT: .functype switch_in_loop (i32, i32) -> (i32)
|
||||
; CHECK: global.get __stack_pointer
|
||||
; CHECK: global.set __stack_pointer
|
||||
; CHECK: block
|
||||
; CHECK: br_if 0
|
||||
; CHECK: .LBB0_2:
|
||||
; CHECK: loop
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: block
|
||||
; CHECK: br_table {0, 1, 2}
|
||||
; CHECK: .LBB0_3:
|
||||
; CHECK: end_block
|
||||
; CHECK: call a
|
||||
; CHECK: br 1
|
||||
; CHECK: .LBB0_4:
|
||||
; CHECK: end_block
|
||||
; CHECK: call b
|
||||
; CHECK: .LBB0_5:
|
||||
; CHECK: end_block
|
||||
; CHECK: br_if 0
|
||||
; CHECK: end_loop
|
||||
; CHECK: .LBB0_7:
|
||||
; CHECK: end_block
|
||||
; CHECK: global.set __stack_pointer
|
||||
; CHECK: end_function
|
||||
define i32 @switch_in_loop(i32* %ops, i32 %len) {
|
||||
entry:
|
||||
%res = alloca i32
|
||||
%0 = bitcast i32* %res to i8*
|
||||
store i32 0, i32* %res
|
||||
%cmp6 = icmp sgt i32 %len, 0
|
||||
br i1 %cmp6, label %for.body, label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup.loopexit: ; preds = %sw.epilog
|
||||
%.pre = load i32, i32* %res
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
||||
%1 = phi i32 [ %.pre, %for.cond.cleanup.loopexit ], [ 0, %entry ]
|
||||
ret i32 %1
|
||||
|
||||
for.body: ; preds = %entry, %sw.epilog
|
||||
%i.07 = phi i32 [ %inc, %sw.epilog ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %ops, i32 %i.07
|
||||
%2 = load i32, i32* %arrayidx
|
||||
switch i32 %2, label %sw.epilog [
|
||||
i32 0, label %sw.bb
|
||||
i32 1, label %sw.bb1
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %for.body
|
||||
call void @a(i32* nonnull %res)
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb1: ; preds = %for.body
|
||||
call void @b(i32* nonnull %res)
|
||||
br label %sw.epilog
|
||||
|
||||
sw.epilog: ; preds = %for.body, %sw.bb1, %sw.bb
|
||||
%inc = add nuw nsw i32 %i.07, 1
|
||||
%exitcond = icmp eq i32 %inc, %len
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
|
||||
}
|
Loading…
Reference in New Issue