[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
; RUN: llc < %s -O0 -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
|
2016-03-09 10:01:14 +08:00
|
|
|
|
|
|
|
; Test irreducible CFG handling.
|
|
|
|
|
|
|
|
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
2018-05-11 01:49:11 +08:00
|
|
|
target triple = "wasm32-unknown-unknown"
|
2016-03-09 10:01:14 +08:00
|
|
|
|
|
|
|
; A simple loop with two entries.
|
|
|
|
|
|
|
|
; CHECK-LABEL: test0:
|
2016-04-26 09:40:56 +08:00
|
|
|
; CHECK: f64.load
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
; CHECK: i32.const $[[REG:[^,]+]]=
|
2016-04-26 09:40:56 +08:00
|
|
|
; CHECK: br_table $[[REG]],
|
2016-03-09 10:01:14 +08:00
|
|
|
define void @test0(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
|
|
|
|
bb:
|
|
|
|
%tmp = icmp eq i32 %arg2, 0
|
|
|
|
br i1 %tmp, label %bb6, label %bb3
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb3: ; preds = %bb
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp4 = getelementptr double, double* %arg, i32 %arg3
|
|
|
|
%tmp5 = load double, double* %tmp4, align 4
|
|
|
|
br label %bb13
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb6: ; preds = %bb13, %bb
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp7 = phi i32 [ %tmp18, %bb13 ], [ 0, %bb ]
|
|
|
|
%tmp8 = icmp slt i32 %tmp7, %arg1
|
|
|
|
br i1 %tmp8, label %bb9, label %bb19
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb9: ; preds = %bb6
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp10 = getelementptr double, double* %arg, i32 %tmp7
|
|
|
|
%tmp11 = load double, double* %tmp10, align 4
|
|
|
|
%tmp12 = fmul double %tmp11, 2.300000e+00
|
|
|
|
store double %tmp12, double* %tmp10, align 4
|
|
|
|
br label %bb13
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb13: ; preds = %bb9, %bb3
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp14 = phi double [ %tmp5, %bb3 ], [ %tmp12, %bb9 ]
|
|
|
|
%tmp15 = phi i32 [ undef, %bb3 ], [ %tmp7, %bb9 ]
|
|
|
|
%tmp16 = getelementptr double, double* %arg, i32 %tmp15
|
|
|
|
%tmp17 = fadd double %tmp14, 1.300000e+00
|
|
|
|
store double %tmp17, double* %tmp16, align 4
|
|
|
|
%tmp18 = add nsw i32 %tmp15, 1
|
|
|
|
br label %bb6
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb19: ; preds = %bb6
|
2016-03-09 10:01:14 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; A simple loop with two entries and an inner natural loop.
|
|
|
|
|
|
|
|
; CHECK-LABEL: test1:
|
2016-04-26 09:40:56 +08:00
|
|
|
; CHECK: f64.load
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
; CHECK: i32.const $[[REG:[^,]+]]=
|
2016-04-26 09:40:56 +08:00
|
|
|
; CHECK: br_table $[[REG]],
|
2016-03-09 10:01:14 +08:00
|
|
|
define void @test1(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
|
|
|
|
bb:
|
|
|
|
%tmp = icmp eq i32 %arg2, 0
|
|
|
|
br i1 %tmp, label %bb6, label %bb3
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb3: ; preds = %bb
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp4 = getelementptr double, double* %arg, i32 %arg3
|
|
|
|
%tmp5 = load double, double* %tmp4, align 4
|
|
|
|
br label %bb13
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb6: ; preds = %bb13, %bb
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp7 = phi i32 [ %tmp18, %bb13 ], [ 0, %bb ]
|
|
|
|
%tmp8 = icmp slt i32 %tmp7, %arg1
|
|
|
|
br i1 %tmp8, label %bb9, label %bb19
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb9: ; preds = %bb6
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp10 = getelementptr double, double* %arg, i32 %tmp7
|
|
|
|
%tmp11 = load double, double* %tmp10, align 4
|
|
|
|
%tmp12 = fmul double %tmp11, 2.300000e+00
|
|
|
|
store double %tmp12, double* %tmp10, align 4
|
|
|
|
br label %bb10
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb10: ; preds = %bb10, %bb9
|
2016-03-09 10:01:14 +08:00
|
|
|
%p = phi i32 [ 0, %bb9 ], [ %pn, %bb10 ]
|
|
|
|
%pn = add i32 %p, 1
|
|
|
|
%c = icmp slt i32 %pn, 256
|
|
|
|
br i1 %c, label %bb10, label %bb13
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb13: ; preds = %bb10, %bb3
|
2016-03-09 10:01:14 +08:00
|
|
|
%tmp14 = phi double [ %tmp5, %bb3 ], [ %tmp12, %bb10 ]
|
|
|
|
%tmp15 = phi i32 [ undef, %bb3 ], [ %tmp7, %bb10 ]
|
|
|
|
%tmp16 = getelementptr double, double* %arg, i32 %tmp15
|
|
|
|
%tmp17 = fadd double %tmp14, 1.300000e+00
|
|
|
|
store double %tmp17, double* %tmp16, align 4
|
|
|
|
%tmp18 = add nsw i32 %tmp15, 1
|
|
|
|
br label %bb6
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
bb19: ; preds = %bb6
|
2016-03-09 10:01:14 +08:00
|
|
|
ret void
|
|
|
|
}
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
|
2019-03-30 09:31:11 +08:00
|
|
|
; A simple loop 2 blocks that are both entries: A1 and A2.
|
|
|
|
; Even though A1 and A2 both have 3 predecessors (A0, A1, and A2), not 6 but
|
|
|
|
; only 4 new routing blocks to the dispatch block should be generated.
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
|
|
|
|
; CHECK-LABEL: test2:
|
|
|
|
; CHECK: br_if
|
|
|
|
; CHECK: i32.const $[[REG:[^,]+]]=
|
2019-03-30 09:31:11 +08:00
|
|
|
; CHECK: i32.const $[[REG]]=
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
; CHECK: br_table $[[REG]],
|
2019-03-30 09:31:11 +08:00
|
|
|
; CHECK: i32.const $[[REG]]=
|
|
|
|
; CHECK: i32.const $[[REG]]=
|
|
|
|
; CHECK-NOT: i32.const $[[REG]]=
|
2019-03-19 13:10:39 +08:00
|
|
|
define i32 @test2(i32) {
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
entry:
|
|
|
|
br label %A0
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
A0: ; preds = %entry
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
%a0a = tail call i32 @test2(i32 1)
|
|
|
|
%a0b = icmp eq i32 %a0a, 0
|
|
|
|
br i1 %a0b, label %A1, label %A2
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
A1: ; preds = %A2, %A1, %A0
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
%a1a = tail call i32 @test2(i32 2)
|
|
|
|
%a1b = icmp eq i32 %a1a, 0
|
|
|
|
br i1 %a1b, label %A1, label %A2
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
A2: ; preds = %A2, %A1, %A0
|
[WebAssembly] Optimize Irreducible Control Flow
Summary:
Irreducible control flow is not that rare, e.g. it happens in malloc and
3 other places in the libc portions linked in to a hello world program.
This patch improves how we handle that code: it emits a br_table to
dispatch to only the minimal necessary number of blocks. This reduces
the size of malloc by 33%, and makes it comparable in size to asm2wasm's
malloc output.
Added some tests, and verified this passes the emscripten-wasm tests run
on the waterfall (binaryen2, wasmobj2, other).
Reviewers: aheejin, sunfish
Subscribers: mgrang, jgravelle-google, sbc100, dschuff, llvm-commits
Differential Revision: https://reviews.llvm.org/D55467
Patch by Alon Zakai (kripken)
llvm-svn: 350367
2019-01-04 07:10:11 +08:00
|
|
|
%a2a = tail call i32 @test2(i32 3)
|
|
|
|
%a2b = icmp eq i32 %a2a, 0
|
|
|
|
br i1 %a2b, label %A1, label %A2
|
|
|
|
}
|
|
|
|
|
|
|
|
; An interesting loop with inner loop and if-else structure too.
|
|
|
|
|
|
|
|
; CHECK-LABEL: test3:
|
|
|
|
; CHECK: br_if
|
|
|
|
define void @test3(i32 %ws) {
|
|
|
|
entry:
|
|
|
|
%ws.addr = alloca i32, align 4
|
|
|
|
store volatile i32 %ws, i32* %ws.addr, align 4
|
|
|
|
%0 = load volatile i32, i32* %ws.addr, align 4
|
|
|
|
%tobool = icmp ne i32 %0, 0
|
|
|
|
br i1 %tobool, label %if.then, label %if.end
|
|
|
|
|
|
|
|
if.then: ; preds = %entry
|
|
|
|
br label %wynn
|
|
|
|
|
|
|
|
if.end: ; preds = %entry
|
|
|
|
%1 = load volatile i32, i32* %ws.addr, align 4
|
|
|
|
%tobool1 = icmp ne i32 %1, 0
|
|
|
|
br i1 %tobool1, label %if.end9, label %if.then2
|
|
|
|
|
|
|
|
if.then2: ; preds = %if.end
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
for.cond: ; preds = %wynn, %if.then7, %if.then2
|
|
|
|
%2 = load volatile i32, i32* %ws.addr, align 4
|
|
|
|
%tobool3 = icmp ne i32 %2, 0
|
|
|
|
br i1 %tobool3, label %if.then4, label %if.end5
|
|
|
|
|
|
|
|
if.then4: ; preds = %for.cond
|
|
|
|
br label %if.end5
|
|
|
|
|
|
|
|
if.end5: ; preds = %if.then4, %for.cond
|
|
|
|
%3 = load volatile i32, i32* %ws.addr, align 4
|
|
|
|
%tobool6 = icmp ne i32 %3, 0
|
|
|
|
br i1 %tobool6, label %if.then7, label %if.end8
|
|
|
|
|
|
|
|
if.then7: ; preds = %if.end5
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
if.end8: ; preds = %if.end5
|
|
|
|
br label %wynn
|
|
|
|
|
|
|
|
wynn: ; preds = %if.end8, %if.then
|
|
|
|
br label %for.cond
|
|
|
|
|
|
|
|
if.end9: ; preds = %if.end
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Multi-level irreducibility, after reducing in the main scope we must then
|
|
|
|
; reduce in the inner loop that we just created.
|
|
|
|
; CHECK: br_table
|
|
|
|
; CHECK: br_table
|
|
|
|
define void @pi_next() {
|
|
|
|
entry:
|
|
|
|
br i1 undef, label %sw.bb5, label %return
|
|
|
|
|
|
|
|
sw.bb5: ; preds = %entry
|
|
|
|
br i1 undef, label %if.then.i49, label %if.else.i52
|
|
|
|
|
|
|
|
if.then.i49: ; preds = %sw.bb5
|
|
|
|
br label %for.inc197.i
|
|
|
|
|
|
|
|
if.else.i52: ; preds = %sw.bb5
|
|
|
|
br label %for.cond57.i
|
|
|
|
|
|
|
|
for.cond57.i: ; preds = %for.inc205.i, %if.else.i52
|
|
|
|
store i32 0, i32* undef, align 4
|
|
|
|
br label %for.cond65.i
|
|
|
|
|
|
|
|
for.cond65.i: ; preds = %for.inc201.i, %for.cond57.i
|
|
|
|
br i1 undef, label %for.body70.i, label %for.inc205.i
|
|
|
|
|
|
|
|
for.body70.i: ; preds = %for.cond65.i
|
|
|
|
br label %for.cond76.i
|
|
|
|
|
|
|
|
for.cond76.i: ; preds = %for.inc197.i, %for.body70.i
|
|
|
|
%0 = phi i32 [ %inc199.i, %for.inc197.i ], [ 0, %for.body70.i ]
|
|
|
|
%cmp81.i = icmp slt i32 %0, 0
|
|
|
|
br i1 %cmp81.i, label %for.body82.i, label %for.inc201.i
|
|
|
|
|
|
|
|
for.body82.i: ; preds = %for.cond76.i
|
|
|
|
br label %for.inc197.i
|
|
|
|
|
|
|
|
for.inc197.i: ; preds = %for.body82.i, %if.then.i49
|
|
|
|
%inc199.i = add nsw i32 undef, 1
|
|
|
|
br label %for.cond76.i
|
|
|
|
|
|
|
|
for.inc201.i: ; preds = %for.cond76.i
|
|
|
|
br label %for.cond65.i
|
|
|
|
|
|
|
|
for.inc205.i: ; preds = %for.cond65.i
|
|
|
|
br label %for.cond57.i
|
|
|
|
|
|
|
|
return: ; preds = %entry
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
; A more complx case of irreducible control flow, two interacting loops.
|
|
|
|
; CHECK: ps_hints_apply
|
|
|
|
; CHECK: br_table
|
|
|
|
define void @ps_hints_apply() {
|
|
|
|
entry:
|
|
|
|
br label %psh
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
psh: ; preds = %entry
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br i1 undef, label %for.cond, label %for.body
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
for.body: ; preds = %psh
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br label %do.body
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
do.body: ; preds = %do.cond, %for.body
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
%cmp118 = icmp eq i32* undef, undef
|
|
|
|
br i1 %cmp118, label %Skip, label %do.cond
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
do.cond: ; preds = %do.body
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br label %do.body
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
for.cond: ; preds = %Skip, %psh
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br label %for.body39
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
for.body39: ; preds = %for.cond
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br i1 undef, label %Skip, label %do.body45
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
do.body45: ; preds = %for.body39
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
unreachable
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
Skip: ; preds = %for.body39, %do.body
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br label %for.cond
|
|
|
|
}
|
|
|
|
|
|
|
|
; A simple sequence of loops with blocks in between, that should not be
|
|
|
|
; misinterpreted as irreducible control flow.
|
|
|
|
; CHECK: fannkuch_worker
|
|
|
|
; CHECK-NOT: br_table
|
|
|
|
define i32 @fannkuch_worker(i8* %_arg) {
|
2019-03-19 13:10:39 +08:00
|
|
|
for.cond:
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
br label %do.body
|
|
|
|
|
|
|
|
do.body: ; preds = %do.cond, %for.cond
|
|
|
|
br label %for.cond1
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
for.cond1: ; preds = %for.cond1, %do.body
|
|
|
|
br i1 true, label %for.cond1, label %for.end
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
|
|
|
|
for.end: ; preds = %for.cond1
|
|
|
|
br label %do.cond
|
|
|
|
|
|
|
|
do.cond: ; preds = %for.end
|
2019-03-19 13:10:39 +08:00
|
|
|
br i1 true, label %do.body, label %do.end
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
|
|
|
|
do.end: ; preds = %do.cond
|
|
|
|
br label %for.cond2
|
|
|
|
|
|
|
|
for.cond2: ; preds = %for.end6, %do.end
|
|
|
|
br label %for.cond3
|
|
|
|
|
2019-03-19 13:10:39 +08:00
|
|
|
for.cond3: ; preds = %for.cond3, %for.cond2
|
|
|
|
br i1 true, label %for.cond3, label %for.end6
|
[WebAssembly] Irreducible control flow rewrite
Summary:
Rewrite WebAssemblyFixIrreducibleControlFlow to a simpler and cleaner
design, which directly computes reachability and other properties
itself. This avoids previous complexity and bugs. (The new graph
analyses are very similar to how the Relooper algorithm would find loop
entries and so forth.)
This fixes a few bugs, including where we had a false positive and
thought fannkuch was irreducible when it was not, which made us much
larger and slower there, and a reverse bug where we missed
irreducibility. On fannkuch, we used to be 44% slower than asm2wasm and
are now 4% faster.
Reviewers: aheejin
Subscribers: jdoerfert, mgrang, dschuff, sbc100, jgravelle-google, sunfish, llvm-commits
Differential Revision: https://reviews.llvm.org/D58919
Patch by Alon Zakai (kripken)
llvm-svn: 356313
2019-03-16 11:00:19 +08:00
|
|
|
|
|
|
|
for.end6: ; preds = %for.cond3
|
|
|
|
br label %for.cond2
|
|
|
|
|
|
|
|
return: ; No predecessors!
|
|
|
|
ret i32 1
|
|
|
|
}
|
|
|
|
|
|
|
|
; Test an interesting pattern of nested irreducibility.
|
|
|
|
|
|
|
|
; CHECK: func_2:
|
|
|
|
; CHECK: br_table
|
|
|
|
define void @func_2() {
|
|
|
|
entry:
|
|
|
|
br i1 undef, label %lbl_937, label %if.else787
|
|
|
|
|
|
|
|
lbl_937: ; preds = %for.body978, %entry
|
|
|
|
br label %if.end965
|
|
|
|
|
|
|
|
if.else787: ; preds = %entry
|
|
|
|
br label %if.end965
|
|
|
|
|
|
|
|
if.end965: ; preds = %if.else787, %lbl_937
|
|
|
|
br label %for.cond967
|
|
|
|
|
|
|
|
for.cond967: ; preds = %for.end1035, %if.end965
|
|
|
|
br label %for.cond975
|
|
|
|
|
|
|
|
for.cond975: ; preds = %if.end984, %for.cond967
|
|
|
|
br i1 undef, label %for.body978, label %for.end1035
|
|
|
|
|
|
|
|
for.body978: ; preds = %for.cond975
|
|
|
|
br i1 undef, label %lbl_937, label %if.end984
|
|
|
|
|
|
|
|
if.end984: ; preds = %for.body978
|
|
|
|
br label %for.cond975
|
|
|
|
|
|
|
|
for.end1035: ; preds = %for.cond975
|
|
|
|
br label %for.cond967
|
|
|
|
}
|