llvm-project/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll

; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s

declare void @some_func() noreturn

; This test contains two trivial unswitch condition in one loop.
; LoopUnswitch pass should be able to unswitch the second one
; after unswitching the first one.
define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test1(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
;
; CHECK:       entry.split:
; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split.split, label %loop_exit
;
; CHECK:       entry.split.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  br i1 %cond1, label %continue, label %loop_exit	; first trivial condition
; CHECK:       loop_begin:
; CHECK-NEXT:    br label %continue

continue:
  %var_val = load i32, i32* %var
  br i1 %cond2, label %do_something, label %loop_exit	; second trivial condition
; CHECK:       continue:
; CHECK-NEXT:    load
; CHECK-NEXT:    br label %do_something

do_something:
  call void @some_func() noreturn nounwind
  br label %loop_begin
; CHECK:       do_something:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_begin

loop_exit:
  ret i32 0
; CHECK:       loop_exit:
; CHECK-NEXT:    br label %loop_exit.split
;
; CHECK:       loop_exit.split:
; CHECK-NEXT:    ret
}

; Test for two trivially unswitchable switches.
define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) {
; CHECK-LABEL: @test3(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
; CHECK-NEXT:      i32 0, label %loop_exit1
; CHECK-NEXT:    ]
;
; CHECK:       entry.split:
; CHECK-NEXT:    switch i32 %cond2, label %loop_exit2 [
; CHECK-NEXT:      i32 42, label %loop_exit2
; CHECK-NEXT:      i32 0, label %entry.split.split
; CHECK-NEXT:    ]
;
; CHECK:       entry.split.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  switch i32 %cond1, label %continue [
    i32 0, label %loop_exit1
  ]
; CHECK:       loop_begin:
; CHECK-NEXT:    br label %continue

continue:
  %var_val = load i32, i32* %var
  switch i32 %cond2, label %loop_exit2 [
    i32 0, label %do_something
    i32 42, label %loop_exit2
  ]
; CHECK:       continue:
; CHECK-NEXT:    load
; CHECK-NEXT:    br label %do_something

do_something:
  call void @some_func() noreturn nounwind
  br label %loop_begin
; CHECK:       do_something:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_begin

loop_exit1:
  ret i32 0
; CHECK:       loop_exit1:
; CHECK-NEXT:    ret

loop_exit2:
  ret i32 0
; CHECK:       loop_exit2:
; CHECK-NEXT:    ret
;
; We shouldn't have any unreachable blocks here because the unswitched switches
; turn into branches instead.
; CHECK-NOT:     unreachable
}

; Test for a trivially unswitchable switch with multiple exiting cases and
; multiple looping cases.
define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) {
; CHECK-LABEL: @test4(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 %cond2, label %loop_exit2 [
; CHECK-NEXT:      i32 13, label %loop_exit1
; CHECK-NEXT:      i32 42, label %loop_exit3
; CHECK-NEXT:      i32 0, label %entry.split
; CHECK-NEXT:      i32 1, label %entry.split
; CHECK-NEXT:      i32 2, label %entry.split
; CHECK-NEXT:    ]
;
; CHECK:       entry.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  %var_val = load i32, i32* %var
  switch i32 %cond2, label %loop_exit2 [
    i32 0, label %loop0
    i32 1, label %loop1
    i32 13, label %loop_exit1
    i32 2, label %loop2
    i32 42, label %loop_exit3
  ]
; CHECK:       loop_begin:
; CHECK-NEXT:    load
; CHECK-NEXT:    switch i32 %cond2, label %[[UNREACHABLE:.*]] [
; CHECK-NEXT:      i32 0, label %loop0
; CHECK-NEXT:      i32 1, label %loop1
; CHECK-NEXT:      i32 2, label %loop2
; CHECK-NEXT:    ]

loop0:
  call void @some_func() noreturn nounwind
  br label %loop_latch
; CHECK:       loop0:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_latch

loop1:
  call void @some_func() noreturn nounwind
  br label %loop_latch
; CHECK:       loop1:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_latch

loop2:
  call void @some_func() noreturn nounwind
  br label %loop_latch
; CHECK:       loop2:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_latch

loop_latch:
  br label %loop_begin
; CHECK:       loop_latch:
; CHECK-NEXT:    br label %loop_begin

loop_exit1:
  ret i32 0
; CHECK:       loop_exit1:
; CHECK-NEXT:    ret

loop_exit2:
  ret i32 0
; CHECK:       loop_exit2:
; CHECK-NEXT:    ret

loop_exit3:
  ret i32 0
; CHECK:       loop_exit3:
; CHECK-NEXT:    ret
;
; CHECK:       [[UNREACHABLE]]:
; CHECK-NEXT:    unreachable
}

; This test contains a trivially unswitchable branch with an LCSSA phi node in
; a loop exit block.
define i32 @test5(i1 %cond1, i32 %x, i32 %y) {
; CHECK-LABEL: @test5(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit
;
; CHECK:       entry.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  br i1 %cond1, label %latch, label %loop_exit
; CHECK:       loop_begin:
; CHECK-NEXT:    br label %latch

latch:
  call void @some_func() noreturn nounwind
  br label %loop_begin
; CHECK:       latch:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_begin

loop_exit:
  %result1 = phi i32 [ %x, %loop_begin ]
  %result2 = phi i32 [ %y, %loop_begin ]
  %result = add i32 %result1, %result2
  ret i32 %result
; CHECK:       loop_exit:
; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %x, %entry ]
; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %y, %entry ]
; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
; CHECK-NEXT:    ret i32 %[[R]]
}

; This test contains a trivially unswitchable branch with a real phi node in LCSSA
; position in a shared exit block where a different path through the loop
; produces a non-invariant input to the PHI node.
define i32 @test6(i32* %var, i1 %cond1, i1 %cond2, i32 %x, i32 %y) {
; CHECK-LABEL: @test6(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
;
; CHECK:       entry.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  br i1 %cond1, label %continue, label %loop_exit
; CHECK:       loop_begin:
; CHECK-NEXT:    br label %continue

continue:
  %var_val = load i32, i32* %var
  br i1 %cond2, label %latch, label %loop_exit
; CHECK:       continue:
; CHECK-NEXT:    load
; CHECK-NEXT:    br i1 %cond2, label %latch, label %loop_exit

latch:
  call void @some_func() noreturn nounwind
  br label %loop_begin
; CHECK:       latch:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_begin

loop_exit:
  %result1 = phi i32 [ %x, %loop_begin ], [ %var_val, %continue ]
  %result2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ]
  %result = add i32 %result1, %result2
  ret i32 %result
; CHECK:       loop_exit:
; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %var_val, %continue ]
; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %var_val, %continue ]
; CHECK-NEXT:    br label %loop_exit.split
;
; CHECK:       loop_exit.split:
; CHECK-NEXT:    %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %[[R1]], %loop_exit ]
; CHECK-NEXT:    %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %[[R2]], %loop_exit ]
; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
; CHECK-NEXT:    ret i32 %[[R]]
}

; This test contains a trivially unswitchable switch with an LCSSA phi node in
; a loop exit block.
define i32 @test7(i32 %cond1, i32 %x, i32 %y) {
; CHECK-LABEL: @test7(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
; CHECK-NEXT:      i32 0, label %loop_exit
; CHECK-NEXT:      i32 1, label %loop_exit
; CHECK-NEXT:    ]
;
; CHECK:       entry.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  switch i32 %cond1, label %latch [
    i32 0, label %loop_exit
    i32 1, label %loop_exit
  ]
; CHECK:       loop_begin:
; CHECK-NEXT:    br label %latch

latch:
  call void @some_func() noreturn nounwind
  br label %loop_begin
; CHECK:       latch:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_begin

loop_exit:
  %result1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ]
  %result2 = phi i32 [ %y, %loop_begin ], [ %y, %loop_begin ]
  %result = add i32 %result1, %result2
  ret i32 %result
; CHECK:       loop_exit:
; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ]
; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ]
; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
; CHECK-NEXT:    ret i32 %[[R]]
}

; This test contains a trivially unswitchable switch with a real phi node in
; LCSSA position in a shared exit block where a different path through the loop
; produces a non-invariant input to the PHI node.
define i32 @test8(i32* %var, i32 %cond1, i32 %cond2, i32 %x, i32 %y) {
; CHECK-LABEL: @test8(
entry:
  br label %loop_begin
; CHECK-NEXT:  entry:
; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
; CHECK-NEXT:      i32 0, label %loop_exit.split
; CHECK-NEXT:      i32 1, label %loop_exit2
; CHECK-NEXT:      i32 2, label %loop_exit.split
; CHECK-NEXT:    ]
;
; CHECK:       entry.split:
; CHECK-NEXT:    br label %loop_begin

loop_begin:
  switch i32 %cond1, label %continue [
    i32 0, label %loop_exit
    i32 1, label %loop_exit2
    i32 2, label %loop_exit
  ]
; CHECK:       loop_begin:
; CHECK-NEXT:    br label %continue

continue:
  %var_val = load i32, i32* %var
  switch i32 %cond2, label %latch [
    i32 0, label %loop_exit
  ]
; CHECK:       continue:
; CHECK-NEXT:    load
; CHECK-NEXT:    switch i32 %cond2, label %latch [
; CHECK-NEXT:      i32 0, label %loop_exit
; CHECK-NEXT:    ]

latch:
  call void @some_func() noreturn nounwind
  br label %loop_begin
; CHECK:       latch:
; CHECK-NEXT:    call
; CHECK-NEXT:    br label %loop_begin

loop_exit:
  %result1.1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ], [ %var_val, %continue ]
  %result1.2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ], [ %y, %loop_begin ]
  %result1 = add i32 %result1.1, %result1.2
  ret i32 %result1
; CHECK:       loop_exit:
; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %var_val, %continue ]
; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %var_val, %continue ]
; CHECK-NEXT:    br label %loop_exit.split
;
; CHECK:       loop_exit.split:
; CHECK-NEXT:    %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ], [ %[[R1]], %loop_exit ]
; CHECK-NEXT:    %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ], [ %[[R2]], %loop_exit ]
; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
; CHECK-NEXT:    ret i32 %[[R]]

loop_exit2:
  %result2.1 = phi i32 [ %x, %loop_begin ]
  %result2.2 = phi i32 [ %y, %loop_begin ]
  %result2 = add i32 %result2.1, %result2.2
  ret i32 %result2
; CHECK:       loop_exit2:
; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %x, %entry ]
; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %y, %entry ]
; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
; CHECK-NEXT:    ret i32 %[[R]]
}

; This test, extracted from the LLVM test suite, has an interesting dominator
; tree to update as there are edges to sibling domtree nodes within child
; domtree nodes of the unswitched node.
define void @xgets(i1 %cond1, i1* %cond2.ptr) {
; CHECK-LABEL: @xgets(
entry:
  br label %for.cond.preheader
; CHECK:       entry:
; CHECK-NEXT:    br label %for.cond.preheader

for.cond.preheader:
  br label %for.cond
; CHECK:       for.cond.preheader:
; CHECK-NEXT:    br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit
;
; CHECK:       for.cond.preheader.split:
; CHECK-NEXT:    br label %for.cond

for.cond:
  br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit
; CHECK:       for.cond:
; CHECK-NEXT:    br label %land.lhs.true

land.lhs.true:
  br label %if.then20
; CHECK:       land.lhs.true:
; CHECK-NEXT:    br label %if.then20

if.then20:
  %cond2 = load volatile i1, i1* %cond2.ptr
  br i1 %cond2, label %if.then23, label %if.else
; CHECK:       if.then20:
; CHECK-NEXT:    %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr
; CHECK-NEXT:    br i1 %[[COND2]], label %if.then23, label %if.else

if.else:
  br label %for.cond
; CHECK:       if.else:
; CHECK-NEXT:    br label %for.cond

if.end17.thread.loopexit:
  br label %if.end17.thread
; CHECK:       if.end17.thread.loopexit:
; CHECK-NEXT:    br label %if.end17.thread

if.end17.thread:
  br label %cleanup
; CHECK:       if.end17.thread:
; CHECK-NEXT:    br label %cleanup

if.then23:
  br label %cleanup
; CHECK:       if.then23:
; CHECK-NEXT:    br label %cleanup

cleanup:
  ret void
; CHECK:       cleanup:
; CHECK-NEXT:    ret void
}
[PM/LoopUnswitch] Introduce a new, simpler loop unswitch pass. Currently, this pass only focuses on trivial loop unswitching. At that reduced problem it remains significantly better than the current loop unswitch: - Old pass is worse than cubic complexity. New pass is (I think) linear. - New pass is much simpler in its design by focusing on full unswitching. (See below for details on this). - New pass doesn't carry state for thresholds between pass iterations. - New pass doesn't carry state for correctness (both miscompile and infloop) between pass iterations. - New pass produces substantially better code after unswitching. - New pass can handle more trivial unswitch cases. - New pass doesn't recompute the dominator tree for the entire function and instead incrementally updates it. I've ported all of the trivial unswitching test cases from the old pass to the new one to make sure that major functionality isn't lost in the process. For several of the test cases I've worked to improve the precision and rigor of the CHECKs, but for many I've just updated them to handle the new IR produced. My initial motivation was the fact that the old pass carried state in very unreliable ways between pass iterations, and these mechansims were incompatible with the new pass manager. However, I discovered many more improvements to make along the way. This pass makes two very significant assumptions that enable most of these improvements: 1) Focus on full unswitching -- that is, completely removing whatever control flow construct is being unswitched from the loop. In the case of trivial unswitching, this means removing the trivial (exiting) edge. In non-trivial unswitching, this means removing the branch or switch itself. This is in opposition to partial unswitching where some part of the unswitched control flow remains in the loop. Partial unswitching only really applies to switches and to folded branches. These are very similar to full unrolling and partial unrolling. The full form is an effective canonicalization, the partial form needs a complex cost model, cannot be iterated, isn't canonicalizing, and should be a separate pass that runs very late (much like unrolling). 2) Leverage LLVM's Loop machinery to the fullest. The original unswitch dates from a time when a great deal of LLVM's loop infrastructure was missing, ineffective, and/or unreliable. As a consequence, a lot of complexity was added which we no longer need. With these two overarching principles, I think we can build a fast and effective unswitcher that fits in well in the new PM and in the canonicalization pipeline. Some of the remaining functionality around partial unswitching may not be relevant today (not many test cases or benchmarks I can find) but if they are I'd like to add support for them as a separate layer that runs very late in the pipeline. Purely to make reviewing and introducing this code more manageable, I've split this into first a trivial-unswitch-only pass and in the next patch I'll add support for full non-trivial unswitching against a fixed threshold, exactly like full unrolling. I even plan to re-use the unrolling thresholds, as these are incredibly similar cost tradeoffs: we're cloning a loop body in order to end up with simplified control flow. We should only do that when the total growth is reasonably small. One of the biggest changes with this pass compared to the previous one is that previously, each individual trivial exiting edge from a switch was unswitched separately as a branch. Now, we unswitch the entire switch at once, with cases going to the various destinations. This lets us unswitch multiple exiting edges in a single operation and also avoids numerous extremely bad behaviors, where we would introduce 1000s of branches to test for thousands of possible values, all of which would take the exact same exit path bypassing the loop. Now we will use a switch with 1000s of cases that can be efficiently lowered into a jumptable. This avoids relying on somehow forming a switch out of the branches or getting horrible code if that fails for any reason. Another significant change is that this pass actively updates the CFG based on unswitching. For trivial unswitching, this is actually very easy because of the definition of loop simplified form. Doing this makes the code coming out of loop unswitch dramatically more friendly. We still should run loop-simplifycfg (at the least) after this to clean up, but it will have to do a lot less work. Finally, this pass makes much fewer attempts to simplify instructions based on the unswitch. Something like loop-instsimplify, instcombine, or GVN can be used to do increasingly powerful simplifications based on the now dominating predicate. The old simplifications are things that something like loop-instsimplify should get today or a very, very basic loop-instcombine could get. Keeping that logic separate is a big simplifying technique. Most of the code in this pass that isn't in the old one has to do with achieving specific goals: - Updating the dominator tree as we go - Unswitching all cases in a switch in a single step. I think it is still shorter than just the trivial unswitching code in the old pass despite having this functionality. Differential Revision: https://reviews.llvm.org/D32409 llvm-svn: 301576 2017-04-28 02:45:20 +08:00			`; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s \| FileCheck %s`

			`declare void @some_func() noreturn`

			`; This test contains two trivial unswitch condition in one loop.`
			`; LoopUnswitch pass should be able to unswitch the second one`
			`; after unswitching the first one.`
			`define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {`
			`; CHECK-LABEL: @test1(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: br i1 %{{.*}}, label %entry.split.split, label %loop_exit`
			`;`
			`; CHECK: entry.split.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`br i1 %cond1, label %continue, label %loop_exit ; first trivial condition`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: br label %continue`

			`continue:`
			`%var_val = load i32, i32* %var`
			`br i1 %cond2, label %do_something, label %loop_exit ; second trivial condition`
			`; CHECK: continue:`
			`; CHECK-NEXT: load`
			`; CHECK-NEXT: br label %do_something`

			`do_something:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_begin`
			`; CHECK: do_something:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit:`
			`ret i32 0`
			`; CHECK: loop_exit:`
			`; CHECK-NEXT: br label %loop_exit.split`
			`;`
			`; CHECK: loop_exit.split:`
			`; CHECK-NEXT: ret`
			`}`

			`; Test for two trivially unswitchable switches.`
			`define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) {`
			`; CHECK-LABEL: @test3(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: switch i32 %cond1, label %entry.split [`
			`; CHECK-NEXT: i32 0, label %loop_exit1`
			`; CHECK-NEXT: ]`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: switch i32 %cond2, label %loop_exit2 [`
			`; CHECK-NEXT: i32 42, label %loop_exit2`
			`; CHECK-NEXT: i32 0, label %entry.split.split`
			`; CHECK-NEXT: ]`
			`;`
			`; CHECK: entry.split.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`switch i32 %cond1, label %continue [`
			`i32 0, label %loop_exit1`
			`]`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: br label %continue`

			`continue:`
			`%var_val = load i32, i32* %var`
			`switch i32 %cond2, label %loop_exit2 [`
			`i32 0, label %do_something`
			`i32 42, label %loop_exit2`
			`]`
			`; CHECK: continue:`
			`; CHECK-NEXT: load`
			`; CHECK-NEXT: br label %do_something`

			`do_something:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_begin`
			`; CHECK: do_something:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit1:`
			`ret i32 0`
			`; CHECK: loop_exit1:`
			`; CHECK-NEXT: ret`

			`loop_exit2:`
			`ret i32 0`
			`; CHECK: loop_exit2:`
			`; CHECK-NEXT: ret`
			`;`
			`; We shouldn't have any unreachable blocks here because the unswitched switches`
			`; turn into branches instead.`
			`; CHECK-NOT: unreachable`
			`}`

			`; Test for a trivially unswitchable switch with multiple exiting cases and`
			`; multiple looping cases.`
			`define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) {`
			`; CHECK-LABEL: @test4(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: switch i32 %cond2, label %loop_exit2 [`
			`; CHECK-NEXT: i32 13, label %loop_exit1`
			`; CHECK-NEXT: i32 42, label %loop_exit3`
			`; CHECK-NEXT: i32 0, label %entry.split`
			`; CHECK-NEXT: i32 1, label %entry.split`
			`; CHECK-NEXT: i32 2, label %entry.split`
			`; CHECK-NEXT: ]`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`%var_val = load i32, i32* %var`
			`switch i32 %cond2, label %loop_exit2 [`
			`i32 0, label %loop0`
			`i32 1, label %loop1`
			`i32 13, label %loop_exit1`
			`i32 2, label %loop2`
			`i32 42, label %loop_exit3`
			`]`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: load`
			`; CHECK-NEXT: switch i32 %cond2, label %[[UNREACHABLE:.*]] [`
			`; CHECK-NEXT: i32 0, label %loop0`
			`; CHECK-NEXT: i32 1, label %loop1`
			`; CHECK-NEXT: i32 2, label %loop2`
			`; CHECK-NEXT: ]`

			`loop0:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_latch`
			`; CHECK: loop0:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_latch`

			`loop1:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_latch`
			`; CHECK: loop1:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_latch`

			`loop2:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_latch`
			`; CHECK: loop2:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_latch`

			`loop_latch:`
			`br label %loop_begin`
			`; CHECK: loop_latch:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit1:`
			`ret i32 0`
			`; CHECK: loop_exit1:`
			`; CHECK-NEXT: ret`

			`loop_exit2:`
			`ret i32 0`
			`; CHECK: loop_exit2:`
			`; CHECK-NEXT: ret`

			`loop_exit3:`
			`ret i32 0`
			`; CHECK: loop_exit3:`
			`; CHECK-NEXT: ret`
			`;`
			`; CHECK: [[UNREACHABLE]]:`
			`; CHECK-NEXT: unreachable`
			`}`
[PM/Unswitch] Teach the new simple loop unswitch to handle loop invariant PHI inputs and to rewrite PHI nodes during the actual unswitching. The checking is quite easy, but rewriting the PHI nodes is somewhat surprisingly challenging. This should handle both branches and switches. I think this is now a full featured trivial unswitcher, and more full featured than the trivial cases in the old pass while still being (IMO) somewhat simpler in how it works. Next up is to verify its correctness in more widespread testing, and then to add non-trivial unswitching. Thanks to Davide and Sanjoy for the excellent review. There is one remaining question that I may address in a follow-up patch (see the review thread for details) but it isn't related to the functionality specifically. Differential Revision: https://reviews.llvm.org/D32699 llvm-svn: 302867 2017-05-12 10:19:59 +08:00
			`; This test contains a trivially unswitchable branch with an LCSSA phi node in`
			`; a loop exit block.`
			`define i32 @test5(i1 %cond1, i32 %x, i32 %y) {`
			`; CHECK-LABEL: @test5(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`br i1 %cond1, label %latch, label %loop_exit`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: br label %latch`

			`latch:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_begin`
			`; CHECK: latch:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit:`
			`%result1 = phi i32 [ %x, %loop_begin ]`
			`%result2 = phi i32 [ %y, %loop_begin ]`
			`%result = add i32 %result1, %result2`
			`ret i32 %result`
			`; CHECK: loop_exit:`
			`; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ]`
			`; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ]`
			`; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]`
			`; CHECK-NEXT: ret i32 %[[R]]`
			`}`

			`; This test contains a trivially unswitchable branch with a real phi node in LCSSA`
			`; position in a shared exit block where a different path through the loop`
			`; produces a non-invariant input to the PHI node.`
			`define i32 @test6(i32* %var, i1 %cond1, i1 %cond2, i32 %x, i32 %y) {`
			`; CHECK-LABEL: @test6(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: br i1 %{{.*}}, label %entry.split, label %loop_exit.split`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`br i1 %cond1, label %continue, label %loop_exit`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: br label %continue`

			`continue:`
			`%var_val = load i32, i32* %var`
			`br i1 %cond2, label %latch, label %loop_exit`
			`; CHECK: continue:`
			`; CHECK-NEXT: load`
			`; CHECK-NEXT: br i1 %cond2, label %latch, label %loop_exit`

			`latch:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_begin`
			`; CHECK: latch:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit:`
			`%result1 = phi i32 [ %x, %loop_begin ], [ %var_val, %continue ]`
			`%result2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ]`
			`%result = add i32 %result1, %result2`
			`ret i32 %result`
			`; CHECK: loop_exit:`
			`; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %var_val, %continue ]`
			`; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %var_val, %continue ]`
			`; CHECK-NEXT: br label %loop_exit.split`
			`;`
			`; CHECK: loop_exit.split:`
			`; CHECK-NEXT: %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %[[R1]], %loop_exit ]`
			`; CHECK-NEXT: %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %[[R2]], %loop_exit ]`
			`; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]`
			`; CHECK-NEXT: ret i32 %[[R]]`
			`}`

			`; This test contains a trivially unswitchable switch with an LCSSA phi node in`
			`; a loop exit block.`
			`define i32 @test7(i32 %cond1, i32 %x, i32 %y) {`
			`; CHECK-LABEL: @test7(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: switch i32 %cond1, label %entry.split [`
			`; CHECK-NEXT: i32 0, label %loop_exit`
			`; CHECK-NEXT: i32 1, label %loop_exit`
			`; CHECK-NEXT: ]`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`switch i32 %cond1, label %latch [`
			`i32 0, label %loop_exit`
			`i32 1, label %loop_exit`
			`]`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: br label %latch`

			`latch:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_begin`
			`; CHECK: latch:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit:`
			`%result1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ]`
			`%result2 = phi i32 [ %y, %loop_begin ], [ %y, %loop_begin ]`
			`%result = add i32 %result1, %result2`
			`ret i32 %result`
			`; CHECK: loop_exit:`
			`; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ]`
			`; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ]`
			`; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]`
			`; CHECK-NEXT: ret i32 %[[R]]`
			`}`

			`; This test contains a trivially unswitchable switch with a real phi node in`
			`; LCSSA position in a shared exit block where a different path through the loop`
			`; produces a non-invariant input to the PHI node.`
			`define i32 @test8(i32* %var, i32 %cond1, i32 %cond2, i32 %x, i32 %y) {`
			`; CHECK-LABEL: @test8(`
			`entry:`
			`br label %loop_begin`
			`; CHECK-NEXT: entry:`
			`; CHECK-NEXT: switch i32 %cond1, label %entry.split [`
			`; CHECK-NEXT: i32 0, label %loop_exit.split`
			`; CHECK-NEXT: i32 1, label %loop_exit2`
			`; CHECK-NEXT: i32 2, label %loop_exit.split`
			`; CHECK-NEXT: ]`
			`;`
			`; CHECK: entry.split:`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_begin:`
			`switch i32 %cond1, label %continue [`
			`i32 0, label %loop_exit`
			`i32 1, label %loop_exit2`
			`i32 2, label %loop_exit`
			`]`
			`; CHECK: loop_begin:`
			`; CHECK-NEXT: br label %continue`

			`continue:`
			`%var_val = load i32, i32* %var`
			`switch i32 %cond2, label %latch [`
			`i32 0, label %loop_exit`
			`]`
			`; CHECK: continue:`
			`; CHECK-NEXT: load`
			`; CHECK-NEXT: switch i32 %cond2, label %latch [`
			`; CHECK-NEXT: i32 0, label %loop_exit`
			`; CHECK-NEXT: ]`

			`latch:`
			`call void @some_func() noreturn nounwind`
			`br label %loop_begin`
			`; CHECK: latch:`
			`; CHECK-NEXT: call`
			`; CHECK-NEXT: br label %loop_begin`

			`loop_exit:`
			`%result1.1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ], [ %var_val, %continue ]`
			`%result1.2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ], [ %y, %loop_begin ]`
			`%result1 = add i32 %result1.1, %result1.2`
			`ret i32 %result1`
			`; CHECK: loop_exit:`
			`; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %var_val, %continue ]`
			`; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %var_val, %continue ]`
			`; CHECK-NEXT: br label %loop_exit.split`
			`;`
			`; CHECK: loop_exit.split:`
			`; CHECK-NEXT: %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ], [ %[[R1]], %loop_exit ]`
			`; CHECK-NEXT: %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ], [ %[[R2]], %loop_exit ]`
			`; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]`
			`; CHECK-NEXT: ret i32 %[[R]]`

			`loop_exit2:`
			`%result2.1 = phi i32 [ %x, %loop_begin ]`
			`%result2.2 = phi i32 [ %y, %loop_begin ]`
			`%result2 = add i32 %result2.1, %result2.2`
			`ret i32 %result2`
			`; CHECK: loop_exit2:`
			`; CHECK-NEXT: %[[R1:.*]] = phi i32 [ %x, %entry ]`
			`; CHECK-NEXT: %[[R2:.*]] = phi i32 [ %y, %entry ]`
			`; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]]`
			`; CHECK-NEXT: ret i32 %[[R]]`
			`}`
[PM/Unswitch] Fix a bug in the domtree update logic for the new unswitch pass. The original logic only considered direct successors of the hoisted domtree nodes, but that isn't really enough. If there are other basic blocks that are completely within the subtree, their successors could just as easily be impacted by the hoisting. The more I think about it, the more I think the correct update here is to hoist every block on the dominance frontier which has an idom in the chain we hoist across. However, this is subtle enough that I'd definitely appreciate some more eyes on it. Sadly, if this is the correct algorithm, it requires computing a (highly localized) dominance frontier. I've done this in the simplest (IE, least code) way I could come up with, but that may be too naive. Suggestions welcome here, dominance update algorithms are not an area I've studied much, so I don't have strong opinions. In good news, with this patch, turning on simple unswitch passes the LLVM test suite for me with asserts enabled. Differential Revision: https://reviews.llvm.org/D32740 llvm-svn: 303843 2017-05-25 14:33:36 +08:00
			`; This test, extracted from the LLVM test suite, has an interesting dominator`
			`; tree to update as there are edges to sibling domtree nodes within child`
			`; domtree nodes of the unswitched node.`
			`define void @xgets(i1 %cond1, i1* %cond2.ptr) {`
			`; CHECK-LABEL: @xgets(`
			`entry:`
			`br label %for.cond.preheader`
			`; CHECK: entry:`
			`; CHECK-NEXT: br label %for.cond.preheader`

			`for.cond.preheader:`
			`br label %for.cond`
			`; CHECK: for.cond.preheader:`
			`; CHECK-NEXT: br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit`
			`;`
			`; CHECK: for.cond.preheader.split:`
			`; CHECK-NEXT: br label %for.cond`

			`for.cond:`
			`br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit`
			`; CHECK: for.cond:`
			`; CHECK-NEXT: br label %land.lhs.true`

			`land.lhs.true:`
			`br label %if.then20`
			`; CHECK: land.lhs.true:`
			`; CHECK-NEXT: br label %if.then20`

			`if.then20:`
			`%cond2 = load volatile i1, i1* %cond2.ptr`
			`br i1 %cond2, label %if.then23, label %if.else`
			`; CHECK: if.then20:`
			`; CHECK-NEXT: %[[COND2:.]] = load volatile i1, i1 %cond2.ptr`
			`; CHECK-NEXT: br i1 %[[COND2]], label %if.then23, label %if.else`

			`if.else:`
			`br label %for.cond`
			`; CHECK: if.else:`
			`; CHECK-NEXT: br label %for.cond`

			`if.end17.thread.loopexit:`
			`br label %if.end17.thread`
			`; CHECK: if.end17.thread.loopexit:`
			`; CHECK-NEXT: br label %if.end17.thread`

			`if.end17.thread:`
			`br label %cleanup`
			`; CHECK: if.end17.thread:`
			`; CHECK-NEXT: br label %cleanup`

			`if.then23:`
			`br label %cleanup`
			`; CHECK: if.then23:`
			`; CHECK-NEXT: br label %cleanup`

			`cleanup:`
			`ret void`
			`; CHECK: cleanup:`
			`; CHECK-NEXT: ret void`
			`}`