diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index c768f35eda5a..9752f521bb24 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -109,6 +110,10 @@ static cl::opt cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden); +static cl::opt FreezeLoopUnswitchCond( + "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden, + cl::desc("If enabled, the freeze instruction will be added to condition " + "of loop unswitch to prevent miscompilation.")); /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -196,15 +201,15 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, /// Copy a set of loop invariant values \p ToDuplicate and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. -static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, - ArrayRef Invariants, - bool Direction, - BasicBlock &UnswitchedSucc, - BasicBlock &NormalSucc) { +static void buildPartialUnswitchConditionalBranch( + BasicBlock &BB, ArrayRef Invariants, bool Direction, + BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) { IRBuilder<> IRB(&BB); Value *Cond = Direction ? IRB.CreateOr(Invariants) : IRB.CreateAnd(Invariants); + if (InsertFreeze) + Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr"); IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc); } @@ -565,7 +570,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" " condition!"); buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection, - *UnswitchedBB, *NewPH); + *UnswitchedBB, *NewPH, false); } // Update the dominator tree with the added edge. @@ -2124,6 +2129,13 @@ static void unswitchNontrivialInvariants( SE->forgetTopmostLoop(&L); } + bool InsertFreeze = false; + if (FreezeLoopUnswitchCond) { + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(&L); + InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L); + } + // If the edge from this terminator to a successor dominates that successor, // store a map from each block in its dominator subtree to it. This lets us // tell when cloning for a particular successor if a block is dominated by @@ -2198,6 +2210,11 @@ static void unswitchNontrivialInvariants( BasicBlock *ClonedPH = ClonedPHs.begin()->second; BI->setSuccessor(ClonedSucc, ClonedPH); BI->setSuccessor(1 - ClonedSucc, LoopPH); + if (InsertFreeze) { + auto Cond = BI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT)) + BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI)); + } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); } else { assert(SI && "Must either be a branch or switch!"); @@ -2212,6 +2229,11 @@ static void unswitchNontrivialInvariants( else Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); + if (InsertFreeze) { + auto Cond = SI->getCondition(); + if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT)) + SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI)); + } // We need to use the set to populate domtree updates as even when there // are multiple cases pointing at the same successor we only want to // remove and insert one edge in the domtree. @@ -2292,7 +2314,7 @@ static void unswitchNontrivialInvariants( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); else buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, - *ClonedPH, *LoopPH); + *ClonedPH, *LoopPH, InsertFreeze); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); if (MSSAU) { diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll new file mode 100644 index 000000000000..f8b5661f8b45 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-freeze.ll @@ -0,0 +1,2330 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -freeze-loop-unswitch-cond -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -freeze-loop-unswitch-cond -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s + +declare i32 @a() +declare i32 @b() +declare i32 @c() +declare i32 @d() + +declare void @sink1(i32) +declare void @sink2(i32) +declare void @sink3(i1) +declare void @sink4(i1) + +declare i1 @cond() +declare i32 @cond.i32() + +declare i32 @__CxxFrameHandler3(...) + +define i32 @test1_freeze(i1* %ptr0, i1* %ptr1, i1* %ptr2) { +; CHECK-LABEL: @test1_freeze( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND1:%.*]] = load i1, i1* [[PTR1:%.*]], align 1 +; CHECK-NEXT: [[COND2:%.*]] = load i1, i1* [[PTR2:%.*]], align 1 +; CHECK-NEXT: br i1 [[COND1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US:%.*]] +; CHECK: loop_begin.us: +; CHECK-NEXT: br label [[LOOP_A_US:%.*]] +; CHECK: loop_a.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a() +; CHECK-NEXT: br label [[LATCH_US:%.*]] +; CHECK: latch.us: +; CHECK-NEXT: [[V_US:%.*]] = load i1, i1* [[PTR0:%.*]], align 1 +; CHECK-NEXT: br i1 [[V_US]], label [[LOOP_BEGIN_US]], label [[LOOP_EXIT_SPLIT_US:%.*]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i1 [[COND2]] +; CHECK-NEXT: br i1 [[COND2_FR]], label [[ENTRY_SPLIT_SPLIT_US:%.*]], label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US1:%.*]] +; CHECK: loop_begin.us1: +; CHECK-NEXT: br label [[LOOP_B_US:%.*]] +; CHECK: loop_b.us: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b() +; CHECK-NEXT: br label [[LOOP_B_A_US:%.*]] +; CHECK: loop_b_a.us: +; CHECK-NEXT: call void @sink3(i1 true) +; CHECK-NEXT: br label [[LATCH_US2:%.*]] +; CHECK: latch.us2: +; CHECK-NEXT: [[V_US3:%.*]] = load i1, i1* [[PTR0]], align 1 +; CHECK-NEXT: br i1 [[V_US3]], label [[LOOP_BEGIN_US1]], label [[LOOP_EXIT_SPLIT_SPLIT_US:%.*]] +; CHECK: loop_exit.split.split.us: +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: br label [[LOOP_B:%.*]] +; CHECK: loop_b: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @b() +; CHECK-NEXT: br label [[LOOP_B_B:%.*]] +; CHECK: loop_b_b: +; CHECK-NEXT: call void @sink4(i1 false) +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[V:%.*]] = load i1, i1* [[PTR0]], align 1 +; CHECK-NEXT: br i1 [[V]], label [[LOOP_BEGIN]], label [[LOOP_EXIT_SPLIT_SPLIT:%.*]] +; CHECK: loop_exit.split.split: +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT]] +; CHECK: loop_exit.split: +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %cond1 = load i1, i1* %ptr1 + %cond2 = load i1, i1* %ptr2 + br label %loop_begin + +loop_begin: + br i1 %cond1, label %loop_a, label %loop_b + +loop_a: + call i32 @a() + br label %latch +; The 'loop_a' unswitched loop. + +loop_b: + call i32 @b() + br i1 %cond2, label %loop_b_a, label %loop_b_b +; The second unswitched condition. + +loop_b_a: + call void @sink3(i1 %cond2) + br label %latch +; The 'loop_b_a' unswitched loop. +; %cond2 is replaced to true + +loop_b_b: + call void @sink4(i1 %cond2) + br label %latch +; The 'loop_b_b' unswitched loop. +; %cond2 is replaced to false + +latch: + %v = load i1, i1* %ptr0 + br i1 %v, label %loop_begin, label %loop_exit + +loop_exit: + ret i32 0 +} + +; Test that when unswitching a deeply nested loop condition in a way that +; produces a non-loop clone that can reach multiple exit blocks which are part +; of different outer loops we correctly divide the cloned loop blocks between +; the outer loops based on reachability. +define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7a( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[A]], [[LOOP_BEGIN]] ], [ [[A2:%.*]], [[INNER_INNER_LOOP_EXIT:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_LOOP_BEGIN_SPLIT_US:%.*]], label [[INNER_LOOP_BEGIN_SPLIT:%.*]] +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[INNER_INNER_LOOP_A_US:%.*]], label [[INNER_INNER_LOOP_B_US:%.*]] +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: [[V3_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3_US]], label [[INNER_INNER_LOOP_EXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_C_US_LOOPEXIT:%.*]] +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: [[A_PHI_LCSSA10:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_BEGIN_US]] ] +; CHECK-NEXT: [[B_LCSSA6:%.*]] = phi i32 [ [[B]], [[INNER_INNER_LOOP_BEGIN_US]] ] +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[LOOP_EXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_C_US:%.*]] +; CHECK: inner_inner_loop_c.us.loopexit: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_C_US]] +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: [[V4_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4_US]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_D_US:%.*]] +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US]] +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: [[A_LCSSA_US:%.*]] = phi i32 [ [[A_PHI_LCSSA10]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: [[B_LCSSA_US:%.*]] = phi i32 [ [[B_LCSSA6]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[INNER_INNER_LOOP_A:%.*]], label [[INNER_INNER_LOOP_B:%.*]] +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[LOOP_EXIT_SPLIT:%.*]], label [[INNER_INNER_LOOP_C:%.*]] +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[INNER_INNER_LOOP_EXIT_SPLIT:%.*]], label [[INNER_INNER_LOOP_C]] +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]], label [[INNER_INNER_LOOP_D:%.*]] +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN]] +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT]] +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: [[A2]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V5:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V5]], label [[INNER_LOOP_EXIT_LOOPEXIT1:%.*]], label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT]] +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_exit.split: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[A_LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[A_LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: [[DOTUS_PHI2:%.*]] = phi i32 [ [[B_LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[B_LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[DOTUS_PHI]], [[DOTUS_PHI2]] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin +; The cloned copy that always exits with the adjustments required to fix up +; loop exits. +; The original copy that continues to loop. + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +} + +; Same pattern as @test7a but here the original loop becomes a non-loop that +; can reach multiple exit blocks which are part of different outer loops. +define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test7b( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[A]], [[LOOP_BEGIN]] ], [ [[A2:%.*]], [[INNER_INNER_LOOP_EXIT:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_LOOP_BEGIN_SPLIT_US:%.*]], label [[INNER_LOOP_BEGIN_SPLIT:%.*]] +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[INNER_INNER_LOOP_A_US:%.*]], label [[INNER_INNER_LOOP_B_US:%.*]] +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: [[V3_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3_US]], label [[INNER_INNER_LOOP_EXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_C_US:%.*]] +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[LOOP_EXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_C_US]] +; CHECK: inner_inner_loop_c.us: +; CHECK-NEXT: [[V4_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4_US]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_D_US:%.*]] +; CHECK: inner_inner_loop_d.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US]] +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: [[A_LCSSA_US:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: [[B_LCSSA_US:%.*]] = phi i32 [ [[B]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[INNER_INNER_LOOP_A:%.*]], label [[INNER_INNER_LOOP_B:%.*]] +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_BEGIN]] ] +; CHECK-NEXT: [[B_LCSSA3:%.*]] = phi i32 [ [[B]], [[INNER_INNER_LOOP_BEGIN]] ] +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[LOOP_EXIT_SPLIT:%.*]], label [[INNER_INNER_LOOP_C:%.*]] +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[INNER_INNER_LOOP_EXIT_SPLIT:%.*]], label [[INNER_INNER_LOOP_C_LOOPEXIT:%.*]] +; CHECK: inner_inner_loop_c.loopexit: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_C]] +; CHECK: inner_inner_loop_c: +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]], label [[INNER_INNER_LOOP_D:%.*]] +; CHECK: inner_inner_loop_d: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT]] +; CHECK: inner_inner_loop_exit.split: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT]] +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: [[A2]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V5:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V5]], label [[INNER_LOOP_EXIT_LOOPEXIT1:%.*]], label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT]] +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_exit.split: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_PHI_LCSSA]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B_LCSSA3]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[A_LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[A_LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: [[DOTUS_PHI2:%.*]] = phi i32 [ [[B_LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[B_LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[DOTUS_PHI]], [[DOTUS_PHI2]] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %inner_inner_loop_c + +inner_inner_loop_b: + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c + +inner_inner_loop_c: + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d + +inner_inner_loop_d: + br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit +; The cloned copy that continues looping. +; The original copy that now always exits and needs adjustments for exit +; blocks. + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v5 = load i1, i1* %ptr + br i1 %v5, label %inner_loop_exit, label %inner_loop_begin + +inner_loop_exit: + br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ] + %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ] + %result = add i32 %a.lcssa, %b.lcssa + ret i32 %result +} + +; Test that when the exit block set of an inner loop changes to start at a less +; high level of the loop nest we correctly hoist the loop up the nest. +define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8a( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[A]], [[LOOP_BEGIN]] ], [ [[A2:%.*]], [[INNER_INNER_LOOP_EXIT:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_LOOP_BEGIN_SPLIT_US:%.*]], label [[INNER_LOOP_BEGIN_SPLIT:%.*]] +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: [[A_PHI_LCSSA4:%.*]] = phi i32 [ [[A_PHI]], [[INNER_LOOP_BEGIN]] ] +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[INNER_INNER_LOOP_A_US:%.*]], label [[INNER_INNER_LOOP_B_US:%.*]] +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_LATCH_US:%.*]] +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[INNER_INNER_LOOP_LATCH_US]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]] +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US]] +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: [[A_PHI_LCSSA2_US:%.*]] = phi i32 [ [[A_PHI_LCSSA4]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[INNER_INNER_LOOP_A:%.*]], label [[INNER_INNER_LOOP_B:%.*]] +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[INNER_INNER_LOOP_LATCH:%.*]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]] +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT]] +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN]] +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: [[A2]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[INNER_LOOP_EXIT_LOOPEXIT1:%.*]], label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: [[A_PHI_LCSSA2:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT]] +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[A_PHI_LCSSA2]], [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT]] ], [ [[A_PHI_LCSSA2_US]], [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_EXIT]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: [[A_PHI3:%.*]] = phi i32 [ [[A_PHI_LCSSA]], [[INNER_LOOP_EXIT_LOOPEXIT1]] ], [ [[DOTUS_PHI]], [[INNER_LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[V5:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V5]], label [[LOOP_EXIT:%.*]], label [[LOOP_BEGIN]] +; CHECK: loop_exit: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_PHI3]], [[INNER_LOOP_EXIT]] ] +; CHECK-NEXT: ret i32 [[A_LCSSA]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit + +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit + +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is now an exit from the inner loop. +; The original region exits the loop earlier. + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa +} + +; Same pattern as @test8a but where the original loop looses an exit block and +; needs to be hoisted up the nest. +define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test8b( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[A]], [[LOOP_BEGIN]] ], [ [[A2:%.*]], [[INNER_INNER_LOOP_EXIT:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_LOOP_BEGIN_SPLIT_US:%.*]], label [[INNER_LOOP_BEGIN_SPLIT:%.*]] +; CHECK: inner_loop_begin.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[INNER_INNER_LOOP_A_US:%.*]], label [[INNER_INNER_LOOP_B_US:%.*]] +; CHECK: inner_inner_loop_b.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT_SPLIT_US:%.*]] +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[INNER_INNER_LOOP_LATCH_US:%.*]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]] +; CHECK: inner_inner_loop_latch.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US]] +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: [[A_PHI_LCSSA2_US:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_loop_begin.split: +; CHECK-NEXT: [[A_PHI_LCSSA4:%.*]] = phi i32 [ [[A_PHI]], [[INNER_LOOP_BEGIN]] ] +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[INNER_INNER_LOOP_A:%.*]], label [[INNER_INNER_LOOP_B:%.*]] +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[INNER_INNER_LOOP_LATCH:%.*]], label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]] +; CHECK: inner_inner_loop_b: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_LATCH]] +; CHECK: inner_inner_loop_latch: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN]] +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: [[A2]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[INNER_LOOP_EXIT_LOOPEXIT1:%.*]], label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit.loopexit.split: +; CHECK-NEXT: [[A_PHI_LCSSA2:%.*]] = phi i32 [ [[A_PHI_LCSSA4]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT]] +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[A_PHI_LCSSA2]], [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT]] ], [ [[A_PHI_LCSSA2_US]], [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI]], [[INNER_INNER_LOOP_EXIT]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: [[A_PHI3:%.*]] = phi i32 [ [[A_PHI_LCSSA]], [[INNER_LOOP_EXIT_LOOPEXIT1]] ], [ [[DOTUS_PHI]], [[INNER_LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[V5:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V5]], label [[LOOP_EXIT:%.*]], label [[LOOP_BEGIN]] +; CHECK: loop_exit: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_PHI3]], [[INNER_LOOP_EXIT]] ] +; CHECK-NEXT: ret i32 [[A_LCSSA]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + br label %inner_loop_begin + +inner_loop_begin: + %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ] + %cond = load i1, i1* %cond.ptr + %b = load i32, i32* %b.ptr + br label %inner_inner_loop_begin + +inner_inner_loop_begin: + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit + +inner_inner_loop_b: + br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch + +inner_inner_loop_latch: + br label %inner_inner_loop_begin +; The cloned region is similar to before but with one earlier exit. +; The original region is now an exit in the preheader. + +inner_inner_loop_exit: + %a2 = load i32, i32* %a.ptr + %v4 = load i1, i1* %ptr + br i1 %v4, label %inner_loop_exit, label %inner_loop_begin + +inner_loop_exit: + %v5 = load i1, i1* %ptr + br i1 %v5, label %loop_exit, label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ] + ret i32 %a.lcssa +} + +; Test that requires re-forming dedicated exits for the cloned loop. +define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10a( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US:%.*]] +; CHECK: loop_begin.us: +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[LOOP_A_US:%.*]], label [[LOOP_B_US:%.*]] +; CHECK: loop_b.us: +; CHECK-NEXT: [[A_US_LCSSA:%.*]] = phi i32 [ [[A_US]], [[LOOP_BEGIN_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT_US:%.*]] +; CHECK: loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[LOOP_EXIT_SPLIT_US_LOOPEXIT:%.*]], label [[LOOP_BEGIN_BACKEDGE_US:%.*]] +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US]] +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: [[A_LCSSA_US_PH:%.*]] = phi i32 [ [[A_US]], [[LOOP_A_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT_US]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: [[A_LCSSA_US:%.*]] = phi i32 [ [[A_US_LCSSA]], [[LOOP_B_US]] ], [ [[A_LCSSA_US_PH]], [[LOOP_EXIT_SPLIT_US_LOOPEXIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[LOOP_A:%.*]], label [[LOOP_B:%.*]] +; CHECK: loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[LOOP_EXIT_SPLIT:%.*]], label [[LOOP_BEGIN_BACKEDGE:%.*]] +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_b: +; CHECK-NEXT: br label [[LOOP_BEGIN_BACKEDGE]] +; CHECK: loop_exit.split: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP_A]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[A_LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[A_LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_begin + +loop_b: + br i1 %cond, label %loop_exit, label %loop_begin +; The cloned loop with one edge as a direct exit. + +; The original loop without one 'loop_exit' edge. + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa +} + +; Test that requires re-forming dedicated exits for the original loop. +define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) { +; CHECK-LABEL: @test10b( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US:%.*]] +; CHECK: loop_begin.us: +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[LOOP_A_US:%.*]], label [[LOOP_B_US:%.*]] +; CHECK: loop_b.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_BACKEDGE_US:%.*]] +; CHECK: loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[LOOP_BEGIN_BACKEDGE_US]], label [[LOOP_EXIT_SPLIT_US:%.*]] +; CHECK: loop_begin.backedge.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: [[A_LCSSA_US:%.*]] = phi i32 [ [[A_US]], [[LOOP_A_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[LOOP_A:%.*]], label [[LOOP_B:%.*]] +; CHECK: loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[LOOP_BEGIN_BACKEDGE:%.*]], label [[LOOP_EXIT_SPLIT_LOOPEXIT:%.*]] +; CHECK: loop_begin.backedge: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_b: +; CHECK-NEXT: [[A_LCSSA1:%.*]] = phi i32 [ [[A]], [[LOOP_BEGIN]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT:%.*]] +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: [[A_LCSSA_PH:%.*]] = phi i32 [ [[A]], [[LOOP_A]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT]] +; CHECK: loop_exit.split: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_LCSSA1]], [[LOOP_B]] ], [ [[A_LCSSA_PH]], [[LOOP_EXIT_SPLIT_LOOPEXIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[A_LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[A_LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_begin, label %loop_exit + +loop_b: + br i1 %cond, label %loop_begin, label %loop_exit +; The cloned loop without one of the exits. + +; The original loop without one 'loop_exit' edge. + +loop_exit: + %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ] + ret i32 %a.lcssa +} + +; Check that if a cloned inner loop after unswitching doesn't loop and directly +; exits even an outer loop, we don't add the cloned preheader to the outer +; loop and do add the needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11a( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[LOOP_LATCH:%.*]], label [[INNER_LOOP_PH:%.*]] +; CHECK: inner_loop_ph: +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_LOOP_PH_SPLIT_US:%.*]], label [[INNER_LOOP_PH_SPLIT:%.*]] +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[INNER_LOOP_PH]] ] +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 [[B_LCSSA]]) +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]] +; CHECK: loop_exit.loopexit.split.us: +; CHECK-NEXT: [[A_LCSSA2_US:%.*]] = phi i32 [ [[A_US]], [[INNER_LOOP_BEGIN_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 [[B]]) +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_A:%.*]] +; CHECK: inner_loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[INNER_LOOP_EXIT:%.*]], label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: [[A_INNER_LCSSA:%.*]] = phi i32 [ [[A]], [[INNER_LOOP_A]] ] +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[LOOP_LATCH]], label [[LOOP_EXIT_LOOPEXIT1:%.*]] +; CHECK: loop_latch: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: [[A_INNER_LCSSA_LCSSA:%.*]] = phi i32 [ [[A_INNER_LCSSA]], [[INNER_LOOP_EXIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_LCSSA2_US]], [[LOOP_EXIT_LOOPEXIT]] ], [ [[A_INNER_LCSSA_LCSSA]], [[LOOP_EXIT_LOOPEXIT1]] ] +; CHECK-NEXT: ret i32 [[A_LCSSA]] +; +entry: + br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin + +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %loop_exit, label %inner_loop_a + +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. +; The original remains a loop losing the exit edge. + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit + +loop_latch: + br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +} + +; Check that if the original inner loop after unswitching doesn't loop and +; directly exits even an outer loop, we remove the original preheader from the +; outer loop and add needed LCSSA phi nodes for the new exit block from the +; outer loop. +define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test11b( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[LOOP_LATCH:%.*]], label [[INNER_LOOP_PH:%.*]] +; CHECK: inner_loop_ph: +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_LOOP_PH_SPLIT_US:%.*]], label [[INNER_LOOP_PH_SPLIT:%.*]] +; CHECK: inner_loop_ph.split.us: +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 [[B]]) +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_A_US:%.*]] +; CHECK: inner_loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[INNER_LOOP_EXIT_SPLIT_US:%.*]], label [[INNER_LOOP_BEGIN_US]] +; CHECK: inner_loop_exit.split.us: +; CHECK-NEXT: [[A_INNER_LCSSA_US:%.*]] = phi i32 [ [[A_US]], [[INNER_LOOP_A_US]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_ph.split: +; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[INNER_LOOP_PH]] ] +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 [[B_LCSSA]]) +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[LOOP_LATCH]], label [[LOOP_EXIT_LOOPEXIT1:%.*]] +; CHECK: loop_latch: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_exit.loopexit: +; CHECK-NEXT: [[A_LCSSA2:%.*]] = phi i32 [ [[A]], [[INNER_LOOP_BEGIN]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: loop_exit.loopexit1: +; CHECK-NEXT: [[A_INNER_LCSSA_LCSSA:%.*]] = phi i32 [ [[A_INNER_LCSSA_US]], [[INNER_LOOP_EXIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_LCSSA2]], [[LOOP_EXIT_LOOPEXIT]] ], [ [[A_INNER_LCSSA_LCSSA]], [[LOOP_EXIT_LOOPEXIT1]] ] +; CHECK-NEXT: ret i32 [[A_LCSSA]] +; +entry: + br label %loop_begin + +loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_latch, label %inner_loop_ph + +inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_loop_begin + +inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_a, label %loop_exit + +inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_loop_exit, label %inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. +; The original remains a loop losing the exit edge. + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_latch, label %loop_exit + +loop_latch: + br label %loop_begin + +loop_exit: + %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +} + +; Like test11a, but checking that when the whole thing is wrapped in yet +; another loop, we correctly attribute the cloned preheader to that outermost +; loop rather than only handling the case where the preheader is not in any loop +; at all. +define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12a( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[INNER_LOOP_LATCH:%.*]], label [[INNER_INNER_LOOP_PH:%.*]] +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_INNER_LOOP_PH_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_PH_SPLIT:%.*]] +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[INNER_INNER_LOOP_PH]] ] +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 [[B_LCSSA]]) +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]] +; CHECK: inner_loop_exit.loopexit.split.us: +; CHECK-NEXT: [[A_LCSSA2_US:%.*]] = phi i32 [ [[A_US]], [[INNER_INNER_LOOP_BEGIN_US]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 [[B]]) +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: br label [[INNER_INNER_LOOP_A:%.*]] +; CHECK: inner_inner_loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[INNER_INNER_LOOP_EXIT:%.*]], label [[INNER_INNER_LOOP_BEGIN]] +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: [[A_INNER_INNER_LCSSA:%.*]] = phi i32 [ [[A]], [[INNER_INNER_LOOP_A]] ] +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[INNER_LOOP_LATCH]], label [[INNER_LOOP_EXIT_LOOPEXIT1:%.*]] +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: [[A_INNER_INNER_LCSSA_LCSSA:%.*]] = phi i32 [ [[A_INNER_INNER_LCSSA]], [[INNER_INNER_LOOP_EXIT]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: [[A_INNER_LCSSA:%.*]] = phi i32 [ [[A_LCSSA2_US]], [[INNER_LOOP_EXIT_LOOPEXIT]] ], [ [[A_INNER_INNER_LCSSA_LCSSA]], [[INNER_LOOP_EXIT_LOOPEXIT1]] ] +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[LOOP_BEGIN]], label [[LOOP_EXIT:%.*]] +; CHECK: loop_exit: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_INNER_LCSSA]], [[INNER_LOOP_EXIT]] ] +; CHECK-NEXT: ret i32 [[A_LCSSA]] +; +entry: + br label %loop_begin + +loop_begin: + br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin + +inner_inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path doesn't actually loop and is an exit from the outer loop as +; well. +; The original remains a loop losing the exit edge. + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit + +inner_loop_latch: + br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +} + +; Like test11b, but checking that when the whole thing is wrapped in yet +; another loop, we correctly sink the preheader to the outermost loop rather +; than only handling the case where the preheader is completely removed from +; a loop. +define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test12b( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_loop_begin: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[INNER_LOOP_LATCH:%.*]], label [[INNER_INNER_LOOP_PH:%.*]] +; CHECK: inner_inner_loop_ph: +; CHECK-NEXT: [[COND:%.*]] = load i1, i1* [[COND_PTR:%.*]], align 1 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[INNER_INNER_LOOP_PH_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_PH_SPLIT:%.*]] +; CHECK: inner_inner_loop_ph.split.us: +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN_US:%.*]] +; CHECK: inner_inner_loop_begin.us: +; CHECK-NEXT: call void @sink1(i32 [[B]]) +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[INNER_INNER_LOOP_A_US:%.*]] +; CHECK: inner_inner_loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[INNER_INNER_LOOP_EXIT_SPLIT_US:%.*]], label [[INNER_INNER_LOOP_BEGIN_US]] +; CHECK: inner_inner_loop_exit.split.us: +; CHECK-NEXT: [[A_INNER_INNER_LCSSA_US:%.*]] = phi i32 [ [[A_US]], [[INNER_INNER_LOOP_A_US]] ] +; CHECK-NEXT: br label [[INNER_INNER_LOOP_EXIT:%.*]] +; CHECK: inner_inner_loop_ph.split: +; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[INNER_INNER_LOOP_PH]] ] +; CHECK-NEXT: br label [[INNER_INNER_LOOP_BEGIN:%.*]] +; CHECK: inner_inner_loop_begin: +; CHECK-NEXT: call void @sink1(i32 [[B_LCSSA]]) +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: br label [[INNER_LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: inner_inner_loop_exit: +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[INNER_LOOP_LATCH]], label [[INNER_LOOP_EXIT_LOOPEXIT1:%.*]] +; CHECK: inner_loop_latch: +; CHECK-NEXT: br label [[INNER_LOOP_BEGIN]] +; CHECK: inner_loop_exit.loopexit: +; CHECK-NEXT: [[A_LCSSA2:%.*]] = phi i32 [ [[A]], [[INNER_INNER_LOOP_BEGIN]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT:%.*]] +; CHECK: inner_loop_exit.loopexit1: +; CHECK-NEXT: [[A_INNER_INNER_LCSSA_LCSSA:%.*]] = phi i32 [ [[A_INNER_INNER_LCSSA_US]], [[INNER_INNER_LOOP_EXIT]] ] +; CHECK-NEXT: br label [[INNER_LOOP_EXIT]] +; CHECK: inner_loop_exit: +; CHECK-NEXT: [[A_INNER_LCSSA:%.*]] = phi i32 [ [[A_LCSSA2]], [[INNER_LOOP_EXIT_LOOPEXIT]] ], [ [[A_INNER_INNER_LCSSA_LCSSA]], [[INNER_LOOP_EXIT_LOOPEXIT1]] ] +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[LOOP_BEGIN]], label [[LOOP_EXIT:%.*]] +; CHECK: loop_exit: +; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A_INNER_LCSSA]], [[INNER_LOOP_EXIT]] ] +; CHECK-NEXT: ret i32 [[A_LCSSA]] +; +entry: + br label %loop_begin + +loop_begin: + br label %inner_loop_begin + +inner_loop_begin: + %b = load i32, i32* %b.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph + +inner_inner_loop_ph: + %cond = load i1, i1* %cond.ptr + br label %inner_inner_loop_begin + +inner_inner_loop_begin: + call void @sink1(i32 %b) + %a = load i32, i32* %a.ptr + br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit + +inner_inner_loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin +; The cloned path continues to loop without the exit out of the entire nest. +; The original remains a loop losing the exit edge. + +inner_inner_loop_exit: + %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ] + %v3 = load i1, i1* %ptr + br i1 %v3, label %inner_loop_latch, label %inner_loop_exit + +inner_loop_latch: + br label %inner_loop_begin + +inner_loop_exit: + %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ] + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_begin, label %loop_exit + +loop_exit: + %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ] + ret i32 %a.lcssa +} + +; Test where the cloned loop has an inner loop that has to be traversed to form +; the cloned loop, and where this inner loop has multiple blocks, and where the +; exiting block that connects the inner loop to the cloned loop is not the header +; block. This ensures that we correctly handle interesting corner cases of +; traversing back to the header when establishing the cloned loop. +define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13a( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US:%.*]] +; CHECK: loop_begin.us: +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[LOOP_A_US:%.*]], label [[LOOP_B_US:%.*]] +; CHECK: loop_b.us: +; CHECK-NEXT: [[B_US:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP_B_INNER_PH_US:%.*]] +; CHECK: loop_b_inner_ph.us: +; CHECK-NEXT: br label [[LOOP_B_INNER_HEADER_US:%.*]] +; CHECK: loop_b_inner_header.us: +; CHECK-NEXT: [[V3_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3_US]], label [[LOOP_B_INNER_LATCH_US:%.*]], label [[LOOP_B_INNER_BODY_US:%.*]] +; CHECK: loop_b_inner_body.us: +; CHECK-NEXT: [[V4_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4_US]], label [[LOOP_B_INNER_LATCH_US]], label [[LOOP_B_INNER_EXIT_US:%.*]] +; CHECK: loop_b_inner_exit.us: +; CHECK-NEXT: br label [[LOOP_LATCH_US:%.*]] +; CHECK: loop_b_inner_latch.us: +; CHECK-NEXT: br label [[LOOP_B_INNER_HEADER_US]] +; CHECK: loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[LOOP_EXIT_SPLIT_US:%.*]], label [[LOOP_LATCH_US]] +; CHECK: loop_latch.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: [[LCSSA_US:%.*]] = phi i32 [ [[A_US]], [[LOOP_A_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[LOOP_A:%.*]], label [[LOOP_B:%.*]] +; CHECK: loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[LOOP_EXIT_SPLIT_LOOPEXIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: loop_b: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR]], align 4 +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT:%.*]] +; CHECK: loop_latch: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_exit.split.loopexit: +; CHECK-NEXT: [[LCSSA_PH:%.*]] = phi i32 [ [[A]], [[LOOP_A]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT]] +; CHECK: loop_exit.split: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP_B]] ], [ [[LCSSA_PH]], [[LOOP_EXIT_SPLIT_LOOPEXIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_b_inner_ph, label %loop_exit + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop contains an inner loop within it. +; And the original loop no longer contains an inner loop. + +loop_exit: + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +} + +; Test where the original loop has an inner loop that has to be traversed to +; rebuild the loop, and where this inner loop has multiple blocks, and where +; the exiting block that connects the inner loop to the original loop is not +; the header block. This ensures that we correctly handle interesting corner +; cases of traversing back to the header when re-establishing the original loop +; still exists after unswitching. +define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) { +; CHECK-LABEL: @test13b( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[COND:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US:%.*]] +; CHECK: loop_begin.us: +; CHECK-NEXT: [[A_US:%.*]] = load i32, i32* [[A_PTR:%.*]], align 4 +; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[V1_US]], label [[LOOP_A_US:%.*]], label [[LOOP_B_US:%.*]] +; CHECK: loop_b.us: +; CHECK-NEXT: [[B_US:%.*]] = load i32, i32* [[B_PTR:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT_US:%.*]] +; CHECK: loop_a.us: +; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2_US]], label [[LOOP_EXIT_SPLIT_US_LOOPEXIT:%.*]], label [[LOOP_LATCH_US:%.*]] +; CHECK: loop_latch.us: +; CHECK-NEXT: br label [[LOOP_BEGIN_US]] +; CHECK: loop_exit.split.us.loopexit: +; CHECK-NEXT: [[LCSSA_US_PH:%.*]] = phi i32 [ [[A_US]], [[LOOP_A_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_SPLIT_US]] +; CHECK: loop_exit.split.us: +; CHECK-NEXT: [[LCSSA_US:%.*]] = phi i32 [ [[B_US]], [[LOOP_B_US]] ], [ [[LCSSA_US_PH]], [[LOOP_EXIT_SPLIT_US_LOOPEXIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]] +; CHECK: loop_begin: +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V1]], label [[LOOP_A:%.*]], label [[LOOP_B:%.*]] +; CHECK: loop_a: +; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V2]], label [[LOOP_EXIT_SPLIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: loop_b: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_PTR]], align 4 +; CHECK-NEXT: br label [[LOOP_B_INNER_PH:%.*]] +; CHECK: loop_b_inner_ph: +; CHECK-NEXT: br label [[LOOP_B_INNER_HEADER:%.*]] +; CHECK: loop_b_inner_header: +; CHECK-NEXT: [[V3:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V3]], label [[LOOP_B_INNER_LATCH:%.*]], label [[LOOP_B_INNER_BODY:%.*]] +; CHECK: loop_b_inner_body: +; CHECK-NEXT: [[V4:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[V4]], label [[LOOP_B_INNER_LATCH]], label [[LOOP_B_INNER_EXIT:%.*]] +; CHECK: loop_b_inner_latch: +; CHECK-NEXT: br label [[LOOP_B_INNER_HEADER]] +; CHECK: loop_b_inner_exit: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop_latch: +; CHECK-NEXT: br label [[LOOP_BEGIN]] +; CHECK: loop_exit.split: +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP_A]] ] +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop_exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[LCSSA]], [[LOOP_EXIT_SPLIT]] ], [ [[LCSSA_US]], [[LOOP_EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %loop_begin + +loop_begin: + %a = load i32, i32* %a.ptr + %v1 = load i1, i1* %ptr + br i1 %v1, label %loop_a, label %loop_b + +loop_a: + %v2 = load i1, i1* %ptr + br i1 %v2, label %loop_exit, label %loop_latch + +loop_b: + %b = load i32, i32* %b.ptr + br i1 %cond, label %loop_exit, label %loop_b_inner_ph + +loop_b_inner_ph: + br label %loop_b_inner_header + +loop_b_inner_header: + %v3 = load i1, i1* %ptr + br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body + +loop_b_inner_body: + %v4 = load i1, i1* %ptr + br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit + +loop_b_inner_latch: + br label %loop_b_inner_header + +loop_b_inner_exit: + br label %loop_latch + +loop_latch: + br label %loop_begin +; The cloned loop doesn't contain an inner loop. +; But the original loop contains an inner loop that must be traversed.; + +loop_exit: + %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ] + ret i32 %lcssa +} + +; A test reduced out of 400.perlbench that when unswitching the `%stop` +; condition clones a loop nest outside of a containing loop. This excercises a +; different cloning path from our other test cases and in turn verifying the +; resulting structure can catch any failures to correctly clone these nested +; loops. +declare void @f() +declare void @g() +declare i32 @h(i32 %arg) + +; Test that when we are unswitching and need to rebuild the loop block set we +; correctly skip past inner loops. We want to use the inner loop to efficiently +; skip whole subregions of the outer loop blocks but just because the header of +; the outer loop is also the preheader of an inner loop shouldn't confuse this +; walk. +define void @test23(i1 %arg, i1* %ptr) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG_FR:%.*]] = freeze i1 [[ARG:%.*]] +; CHECK-NEXT: br i1 [[ARG_FR]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[OUTER_HEADER_US:%.*]] +; CHECK: outer.header.us: +; CHECK-NEXT: br label [[INNER_HEADER_US:%.*]] +; CHECK: inner.header.us: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: br label [[INNER_LATCH_US:%.*]] +; CHECK: inner.latch.us: +; CHECK-NEXT: [[INNER_COND_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1 +; CHECK-NEXT: br i1 [[INNER_COND_US]], label [[INNER_HEADER_US]], label [[OUTER_BODY_US:%.*]] +; CHECK: outer.body.us: +; CHECK-NEXT: br label [[OUTER_BODY_LEFT_US:%.*]] +; CHECK: outer.body.left.us: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: br label [[OUTER_LATCH_US:%.*]] +; CHECK: outer.latch.us: +; CHECK-NEXT: [[OUTER_COND_US:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[OUTER_COND_US]], label [[OUTER_HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: br label [[INNER_LATCH:%.*]] +; CHECK: inner.latch: +; CHECK-NEXT: [[INNER_COND:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[OUTER_BODY:%.*]] +; CHECK: outer.body: +; CHECK-NEXT: br label [[OUTER_BODY_RIGHT:%.*]] +; CHECK: outer.body.right: +; CHECK-NEXT: call void @g() +; CHECK-NEXT: br label [[OUTER_LATCH:%.*]] +; CHECK: outer.latch: +; CHECK-NEXT: [[OUTER_COND:%.*]] = load i1, i1* [[PTR]], align 1 +; CHECK-NEXT: br i1 [[OUTER_COND]], label [[OUTER_HEADER]], label [[EXIT_SPLIT:%.*]] +; CHECK: exit.split: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %outer.header +; Just verify that we unswitched the correct bits. We should call `@f` twice in +; one unswitch and `@f` and then `@g` in the other. + +outer.header: + br label %inner.header + +inner.header: + call void @f() + br label %inner.latch + +inner.latch: + %inner.cond = load i1, i1* %ptr + br i1 %inner.cond, label %inner.header, label %outer.body + +outer.body: + br i1 %arg, label %outer.body.left, label %outer.body.right + +outer.body.left: + call void @f() + br label %outer.latch + +outer.body.right: + call void @g() + br label %outer.latch + +outer.latch: + %outer.cond = load i1, i1* %ptr + br i1 %outer.cond, label %outer.header, label %exit + +exit: + ret void +} + +; A test case designed to exercise unusual properties of switches: they +; can introduce multiple edges to successors. These need lots of special case +; handling as they get collapsed in many cases (domtree, the unswitch itself) +; but not in all cases (the PHI node operands). +define i32 @test29(i32 %arg) { +; CHECK-LABEL: @test29( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG_FR:%.*]] = freeze i32 [[ARG:%.*]] +; CHECK-NEXT: switch i32 [[ARG_FR]], label [[ENTRY_SPLIT:%.*]] [ +; CHECK-NEXT: i32 0, label [[ENTRY_SPLIT_US:%.*]] +; CHECK-NEXT: i32 1, label [[ENTRY_SPLIT_US]] +; CHECK-NEXT: i32 2, label [[ENTRY_SPLIT_US1:%.*]] +; CHECK-NEXT: i32 3, label [[ENTRY_SPLIT]] +; CHECK-NEXT: ] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[HEADER_US:%.*]] +; CHECK: header.us: +; CHECK-NEXT: [[TMP_US:%.*]] = call i32 @d() +; CHECK-NEXT: [[CMP1_US:%.*]] = icmp eq i32 [[TMP_US]], 0 +; CHECK-NEXT: br i1 [[CMP1_US]], label [[BODY_A_US:%.*]], label [[DISPATCH_US:%.*]] +; CHECK: dispatch.us: +; CHECK-NEXT: br label [[BODY_A_US]] +; CHECK: body.a.us: +; CHECK-NEXT: [[TMP_A_PHI_US:%.*]] = phi i32 [ 0, [[HEADER_US]] ], [ [[TMP_US]], [[DISPATCH_US]] ] +; CHECK-NEXT: [[TMP_A_US:%.*]] = call i32 @a() +; CHECK-NEXT: [[TMP_A_SUM_US:%.*]] = add i32 [[TMP_A_PHI_US]], [[TMP_A_US]] +; CHECK-NEXT: br label [[BODY_B_US:%.*]] +; CHECK: body.b.us: +; CHECK-NEXT: [[TMP_B_PHI_US:%.*]] = phi i32 [ [[TMP_A_SUM_US]], [[BODY_A_US]] ] +; CHECK-NEXT: [[TMP_B_US:%.*]] = call i32 @b() +; CHECK-NEXT: [[TMP_B_SUM_US:%.*]] = add i32 [[TMP_B_PHI_US]], [[TMP_B_US]] +; CHECK-NEXT: br label [[BODY_C_US:%.*]] +; CHECK: body.c.us: +; CHECK-NEXT: [[TMP_C_PHI_US:%.*]] = phi i32 [ [[TMP_B_SUM_US]], [[BODY_B_US]] ] +; CHECK-NEXT: [[TMP_C_US:%.*]] = call i32 @c() +; CHECK-NEXT: [[TMP_C_SUM_US:%.*]] = add i32 [[TMP_C_PHI_US]], [[TMP_C_US]] +; CHECK-NEXT: br label [[LATCH_US:%.*]] +; CHECK: latch.us: +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i32 [[TMP_C_SUM_US]], 42 +; CHECK-NEXT: br i1 [[CMP2_US]], label [[HEADER_US]], label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: [[LCSSA_PHI_US:%.*]] = phi i32 [ [[TMP_C_SUM_US]], [[LATCH_US]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.split.us1: +; CHECK-NEXT: br label [[HEADER_US2:%.*]] +; CHECK: header.us2: +; CHECK-NEXT: [[TMP_US3:%.*]] = call i32 @d() +; CHECK-NEXT: [[CMP1_US4:%.*]] = icmp eq i32 [[TMP_US3]], 0 +; CHECK-NEXT: br i1 [[CMP1_US4]], label [[BODY_A_US6:%.*]], label [[DISPATCH_US5:%.*]] +; CHECK: dispatch.us5: +; CHECK-NEXT: br label [[BODY_B_US10:%.*]] +; CHECK: body.a.us6: +; CHECK-NEXT: [[TMP_A_PHI_US7:%.*]] = phi i32 [ 0, [[HEADER_US2]] ] +; CHECK-NEXT: [[TMP_A_US8:%.*]] = call i32 @a() +; CHECK-NEXT: [[TMP_A_SUM_US9:%.*]] = add i32 [[TMP_A_PHI_US7]], [[TMP_A_US8]] +; CHECK-NEXT: br label [[BODY_B_US10]] +; CHECK: body.b.us10: +; CHECK-NEXT: [[TMP_B_PHI_US11:%.*]] = phi i32 [ [[TMP_US3]], [[DISPATCH_US5]] ], [ [[TMP_A_SUM_US9]], [[BODY_A_US6]] ] +; CHECK-NEXT: [[TMP_B_US12:%.*]] = call i32 @b() +; CHECK-NEXT: [[TMP_B_SUM_US13:%.*]] = add i32 [[TMP_B_PHI_US11]], [[TMP_B_US12]] +; CHECK-NEXT: br label [[BODY_C_US14:%.*]] +; CHECK: body.c.us14: +; CHECK-NEXT: [[TMP_C_PHI_US15:%.*]] = phi i32 [ [[TMP_B_SUM_US13]], [[BODY_B_US10]] ] +; CHECK-NEXT: [[TMP_C_US16:%.*]] = call i32 @c() +; CHECK-NEXT: [[TMP_C_SUM_US17:%.*]] = add i32 [[TMP_C_PHI_US15]], [[TMP_C_US16]] +; CHECK-NEXT: br label [[LATCH_US18:%.*]] +; CHECK: latch.us18: +; CHECK-NEXT: [[CMP2_US19:%.*]] = icmp slt i32 [[TMP_C_SUM_US17]], 42 +; CHECK-NEXT: br i1 [[CMP2_US19]], label [[HEADER_US2]], label [[EXIT_SPLIT_SPLIT_US:%.*]] +; CHECK: exit.split.split.us: +; CHECK-NEXT: [[LCSSA_PHI_US20:%.*]] = phi i32 [ [[TMP_C_SUM_US17]], [[LATCH_US18]] ] +; CHECK-NEXT: br label [[EXIT_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @d() +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[BODY_A:%.*]], label [[DISPATCH:%.*]] +; CHECK: dispatch: +; CHECK-NEXT: br label [[BODY_C:%.*]] +; CHECK: body.a: +; CHECK-NEXT: [[TMP_A_PHI:%.*]] = phi i32 [ 0, [[HEADER]] ] +; CHECK-NEXT: [[TMP_A:%.*]] = call i32 @a() +; CHECK-NEXT: [[TMP_A_SUM:%.*]] = add i32 [[TMP_A_PHI]], [[TMP_A]] +; CHECK-NEXT: br label [[BODY_B:%.*]] +; CHECK: body.b: +; CHECK-NEXT: [[TMP_B_PHI:%.*]] = phi i32 [ [[TMP_A_SUM]], [[BODY_A]] ] +; CHECK-NEXT: [[TMP_B:%.*]] = call i32 @b() +; CHECK-NEXT: [[TMP_B_SUM:%.*]] = add i32 [[TMP_B_PHI]], [[TMP_B]] +; CHECK-NEXT: br label [[BODY_C]] +; CHECK: body.c: +; CHECK-NEXT: [[TMP_C_PHI:%.*]] = phi i32 [ [[TMP]], [[DISPATCH]] ], [ [[TMP_B_SUM]], [[BODY_B]] ] +; CHECK-NEXT: [[TMP_C:%.*]] = call i32 @c() +; CHECK-NEXT: [[TMP_C_SUM:%.*]] = add i32 [[TMP_C_PHI]], [[TMP_C]] +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP_C_SUM]], 42 +; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER]], label [[EXIT_SPLIT_SPLIT:%.*]] +; CHECK: exit.split.split: +; CHECK-NEXT: [[LCSSA_PHI:%.*]] = phi i32 [ [[TMP_C_SUM]], [[LATCH]] ] +; CHECK-NEXT: br label [[EXIT_SPLIT]] +; CHECK: exit.split: +; CHECK-NEXT: [[DOTUS_PHI21:%.*]] = phi i32 [ [[LCSSA_PHI]], [[EXIT_SPLIT_SPLIT]] ], [ [[LCSSA_PHI_US20]], [[EXIT_SPLIT_SPLIT_US]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[DOTUS_PHI21]], [[EXIT_SPLIT]] ], [ [[LCSSA_PHI_US]], [[EXIT_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +entry: + br label %header + +header: + %tmp = call i32 @d() + %cmp1 = icmp eq i32 %tmp, 0 + ; We set up a chain through all the successors of the switch that doesn't + ; involve the switch so that we can have interesting PHI nodes in them. + br i1 %cmp1, label %body.a, label %dispatch + +dispatch: + ; Switch with multiple successors. We arrange the last successor to be the + ; default to make the test case easier to read. This has a duplicate edge + ; both to the default destination (which is completely superfluous but + ; technically valid IR) and to a regular successor. + switch i32 %arg, label %body.c [ + i32 0, label %body.a + i32 1, label %body.a + i32 2, label %body.b + i32 3, label %body.c + ] + +body.a: + %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ], [ %tmp, %dispatch ] + %tmp.a = call i32 @a() + %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a + br label %body.b +; Unswitched 'a' loop. + +body.b: + %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ] + %tmp.b = call i32 @b() + %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b + br label %body.c +; Unswitched 'b' loop. + +body.c: + %tmp.c.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.b.sum, %body.b ] + %tmp.c = call i32 @c() + %tmp.c.sum = add i32 %tmp.c.phi, %tmp.c + br label %latch +; Unswitched 'c' loop. + +latch: + %cmp2 = icmp slt i32 %tmp.c.sum, 42 + br i1 %cmp2, label %header, label %exit + +exit: + %lcssa.phi = phi i32 [ %tmp.c.sum, %latch ] + ret i32 %lcssa.phi + +} + +; Similar to @test29 but designed to have one of the duplicate edges be +; a loop exit edge as those can in some cases be special. Among other things, +; this includes an LCSSA phi with multiple entries despite being a dedicated +; exit block. +define i32 @test30(i32 %arg) { +; CHECK-LABEL: @test30( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG_FR:%.*]] = freeze i32 [[ARG:%.*]] +; CHECK-NEXT: switch i32 [[ARG_FR]], label [[ENTRY_SPLIT:%.*]] [ +; CHECK-NEXT: i32 -1, label [[ENTRY_SPLIT]] +; CHECK-NEXT: i32 0, label [[ENTRY_SPLIT_US:%.*]] +; CHECK-NEXT: i32 1, label [[ENTRY_SPLIT_US1:%.*]] +; CHECK-NEXT: i32 2, label [[ENTRY_SPLIT_US1]] +; CHECK-NEXT: ] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[HEADER_US:%.*]] +; CHECK: header.us: +; CHECK-NEXT: [[TMP_US:%.*]] = call i32 @d() +; CHECK-NEXT: [[CMP1_US:%.*]] = icmp eq i32 [[TMP_US]], 0 +; CHECK-NEXT: br i1 [[CMP1_US]], label [[BODY_A_US:%.*]], label [[DISPATCH_US:%.*]] +; CHECK: dispatch.us: +; CHECK-NEXT: br label [[BODY_A_US]] +; CHECK: body.a.us: +; CHECK-NEXT: [[TMP_A_PHI_US:%.*]] = phi i32 [ 0, [[HEADER_US]] ], [ [[TMP_US]], [[DISPATCH_US]] ] +; CHECK-NEXT: [[TMP_A_US:%.*]] = call i32 @a() +; CHECK-NEXT: [[TMP_A_SUM_US:%.*]] = add i32 [[TMP_A_PHI_US]], [[TMP_A_US]] +; CHECK-NEXT: br label [[BODY_B_US:%.*]] +; CHECK: body.b.us: +; CHECK-NEXT: [[TMP_B_PHI_US:%.*]] = phi i32 [ [[TMP_A_SUM_US]], [[BODY_A_US]] ] +; CHECK-NEXT: [[TMP_B_US:%.*]] = call i32 @b() +; CHECK-NEXT: [[TMP_B_SUM_US:%.*]] = add i32 [[TMP_B_PHI_US]], [[TMP_B_US]] +; CHECK-NEXT: br label [[LATCH_US:%.*]] +; CHECK: latch.us: +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i32 [[TMP_B_SUM_US]], 42 +; CHECK-NEXT: br i1 [[CMP2_US]], label [[HEADER_US]], label [[LOOP_EXIT2_SPLIT_US:%.*]] +; CHECK: loop.exit2.split.us: +; CHECK-NEXT: [[L2_PHI_US:%.*]] = phi i32 [ [[TMP_B_SUM_US]], [[LATCH_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT2:%.*]] +; CHECK: entry.split.us1: +; CHECK-NEXT: br label [[HEADER_US2:%.*]] +; CHECK: header.us2: +; CHECK-NEXT: [[TMP_US3:%.*]] = call i32 @d() +; CHECK-NEXT: [[CMP1_US4:%.*]] = icmp eq i32 [[TMP_US3]], 0 +; CHECK-NEXT: br i1 [[CMP1_US4]], label [[BODY_A_US6:%.*]], label [[DISPATCH_US5:%.*]] +; CHECK: dispatch.us5: +; CHECK-NEXT: br label [[BODY_B_US10:%.*]] +; CHECK: body.a.us6: +; CHECK-NEXT: [[TMP_A_PHI_US7:%.*]] = phi i32 [ 0, [[HEADER_US2]] ] +; CHECK-NEXT: [[TMP_A_US8:%.*]] = call i32 @a() +; CHECK-NEXT: [[TMP_A_SUM_US9:%.*]] = add i32 [[TMP_A_PHI_US7]], [[TMP_A_US8]] +; CHECK-NEXT: br label [[BODY_B_US10]] +; CHECK: body.b.us10: +; CHECK-NEXT: [[TMP_B_PHI_US11:%.*]] = phi i32 [ [[TMP_US3]], [[DISPATCH_US5]] ], [ [[TMP_A_SUM_US9]], [[BODY_A_US6]] ] +; CHECK-NEXT: [[TMP_B_US12:%.*]] = call i32 @b() +; CHECK-NEXT: [[TMP_B_SUM_US13:%.*]] = add i32 [[TMP_B_PHI_US11]], [[TMP_B_US12]] +; CHECK-NEXT: br label [[LATCH_US14:%.*]] +; CHECK: latch.us14: +; CHECK-NEXT: [[CMP2_US15:%.*]] = icmp slt i32 [[TMP_B_SUM_US13]], 42 +; CHECK-NEXT: br i1 [[CMP2_US15]], label [[HEADER_US2]], label [[LOOP_EXIT2_SPLIT_SPLIT_US:%.*]] +; CHECK: loop.exit2.split.split.us: +; CHECK-NEXT: [[L2_PHI_US16:%.*]] = phi i32 [ [[TMP_B_SUM_US13]], [[LATCH_US14]] ] +; CHECK-NEXT: br label [[LOOP_EXIT2_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @d() +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[BODY_A:%.*]], label [[DISPATCH:%.*]] +; CHECK: dispatch: +; CHECK-NEXT: [[TMP_LCSSA:%.*]] = phi i32 [ [[TMP]], [[HEADER]] ] +; CHECK-NEXT: br label [[LOOP_EXIT1:%.*]] +; CHECK: body.a: +; CHECK-NEXT: [[TMP_A_PHI:%.*]] = phi i32 [ 0, [[HEADER]] ] +; CHECK-NEXT: [[TMP_A:%.*]] = call i32 @a() +; CHECK-NEXT: [[TMP_A_SUM:%.*]] = add i32 [[TMP_A_PHI]], [[TMP_A]] +; CHECK-NEXT: br label [[BODY_B:%.*]] +; CHECK: body.b: +; CHECK-NEXT: [[TMP_B_PHI:%.*]] = phi i32 [ [[TMP_A_SUM]], [[BODY_A]] ] +; CHECK-NEXT: [[TMP_B:%.*]] = call i32 @b() +; CHECK-NEXT: [[TMP_B_SUM:%.*]] = add i32 [[TMP_B_PHI]], [[TMP_B]] +; CHECK-NEXT: br label [[LATCH:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP_B_SUM]], 42 +; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER]], label [[LOOP_EXIT2_SPLIT_SPLIT:%.*]] +; CHECK: loop.exit1: +; CHECK-NEXT: [[L1_PHI:%.*]] = phi i32 [ [[TMP_LCSSA]], [[DISPATCH]] ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: loop.exit2.split.split: +; CHECK-NEXT: [[L2_PHI:%.*]] = phi i32 [ [[TMP_B_SUM]], [[LATCH]] ] +; CHECK-NEXT: br label [[LOOP_EXIT2_SPLIT]] +; CHECK: loop.exit2.split: +; CHECK-NEXT: [[DOTUS_PHI17:%.*]] = phi i32 [ [[L2_PHI]], [[LOOP_EXIT2_SPLIT_SPLIT]] ], [ [[L2_PHI_US16]], [[LOOP_EXIT2_SPLIT_SPLIT_US]] ] +; CHECK-NEXT: br label [[LOOP_EXIT2]] +; CHECK: loop.exit2: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[DOTUS_PHI17]], [[LOOP_EXIT2_SPLIT]] ], [ [[L2_PHI_US]], [[LOOP_EXIT2_SPLIT_US]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[L_PHI:%.*]] = phi i32 [ [[L1_PHI]], [[LOOP_EXIT1]] ], [ [[DOTUS_PHI]], [[LOOP_EXIT2]] ] +; CHECK-NEXT: ret i32 [[L_PHI]] +; +entry: + br label %header + +header: + %tmp = call i32 @d() + %cmp1 = icmp eq i32 %tmp, 0 + br i1 %cmp1, label %body.a, label %dispatch + +dispatch: + switch i32 %arg, label %loop.exit1 [ + i32 -1, label %loop.exit1 + i32 0, label %body.a + i32 1, label %body.b + i32 2, label %body.b + ] + +body.a: + %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ] + %tmp.a = call i32 @a() + %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a + br label %body.b +; Unswitched 'a' loop. + +body.b: + %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ] + %tmp.b = call i32 @b() + %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b + br label %latch +; Unswitched 'b' loop. + +latch: + %cmp2 = icmp slt i32 %tmp.b.sum, 42 + br i1 %cmp2, label %header, label %loop.exit2 + +loop.exit1: + %l1.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ] + br label %exit +; Unswitched 'exit' loop. + +loop.exit2: + %l2.phi = phi i32 [ %tmp.b.sum, %latch ] + br label %exit + +exit: + %l.phi = phi i32 [ %l1.phi, %loop.exit1 ], [ %l2.phi, %loop.exit2 ] + ret i32 %l.phi +} + +; Unswitch will not actually change the loop nest from: +; A < B < C +define void @hoist_inner_loop0() { +; CHECK-LABEL: @hoist_inner_loop0( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: [[V1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i1 [[V1]] +; CHECK-NEXT: br i1 [[V1_FR]], label [[B_HEADER_SPLIT_US:%.*]], label [[B_HEADER_SPLIT:%.*]] +; CHECK: b.header.split.us: +; CHECK-NEXT: br label [[C_HEADER_US:%.*]] +; CHECK: c.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[B_LATCH_SPLIT_US:%.*]] +; CHECK: b.latch.split.us: +; CHECK-NEXT: br label [[B_LATCH:%.*]] +; CHECK: b.header.split: +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[C_LATCH:%.*]] +; CHECK: c.latch: +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[C_HEADER]], label [[B_LATCH_SPLIT:%.*]] +; CHECK: b.latch.split: +; CHECK-NEXT: br label [[B_LATCH]] +; CHECK: b.latch: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[B_HEADER]], label [[A_LATCH:%.*]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + br label %b.header + +b.header: + %v1 = call i1 @cond() + br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.latch + +c.latch: + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %b.latch + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.latch + +a.latch: + br label %a.header + +exit: + ret void +} + +; Unswitch will transform the loop nest from: +; A < B < C +; into +; A < (B, C) +define void @hoist_inner_loop1(i32* %ptr) { +; CHECK-LABEL: @hoist_inner_loop1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: [[X_A:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: [[X_B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i1 [[V1]] +; CHECK-NEXT: br i1 [[V1_FR]], label [[B_HEADER_SPLIT_US:%.*]], label [[B_HEADER_SPLIT:%.*]] +; CHECK: b.header.split.us: +; CHECK-NEXT: br label [[C_HEADER_US:%.*]] +; CHECK: c.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[B_LATCH_SPLIT_US:%.*]] +; CHECK: b.latch.split.us: +; CHECK-NEXT: br label [[B_LATCH:%.*]] +; CHECK: b.header.split: +; CHECK-NEXT: [[X_B_LCSSA:%.*]] = phi i32 [ [[X_B]], [[B_HEADER]] ] +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[C_LATCH:%.*]] +; CHECK: c.latch: +; CHECK-NEXT: store i32 [[X_A]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_B_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[C_HEADER]], label [[A_EXIT_C:%.*]] +; CHECK: b.latch: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[B_HEADER]], label [[A_EXIT_B:%.*]] +; CHECK: a.exit.c: +; CHECK-NEXT: br label [[A_LATCH:%.*]] +; CHECK: a.exit.b: +; CHECK-NEXT: br label [[A_LATCH]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.latch + +c.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %a.exit.c + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.exit.b + +a.exit.c: + br label %a.latch + +a.exit.b: + br label %a.latch + +a.latch: + br label %a.header + +exit: + ret void +} + +; Unswitch will transform the loop nest from: +; A < B < C +; into +; (A < B), C +define void @hoist_inner_loop2(i32* %ptr) { +; CHECK-LABEL: @hoist_inner_loop2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: [[X_A:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: [[X_B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i1 [[V1]] +; CHECK-NEXT: br i1 [[V1_FR]], label [[B_HEADER_SPLIT_US:%.*]], label [[B_HEADER_SPLIT:%.*]] +; CHECK: b.header.split.us: +; CHECK-NEXT: br label [[C_HEADER_US:%.*]] +; CHECK: c.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[B_LATCH_SPLIT_US:%.*]] +; CHECK: b.latch.split.us: +; CHECK-NEXT: br label [[B_LATCH:%.*]] +; CHECK: b.header.split: +; CHECK-NEXT: [[X_A_LCSSA:%.*]] = phi i32 [ [[X_A]], [[B_HEADER]] ] +; CHECK-NEXT: [[X_B_LCSSA:%.*]] = phi i32 [ [[X_B]], [[B_HEADER]] ] +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[C_LATCH:%.*]] +; CHECK: c.latch: +; CHECK-NEXT: store i32 [[X_A_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_B_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[C_HEADER]], label [[EXIT:%.*]] +; CHECK: b.latch: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[B_HEADER]], label [[A_LATCH:%.*]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.latch + +c.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %exit + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.latch + +a.latch: + br label %a.header + +exit: + ret void +} + +; Same as @hoist_inner_loop2 but with a nested loop inside the hoisted loop. +; Unswitch will transform the loop nest from: +; A < B < C < D +; into +; (A < B), (C < D) +define void @hoist_inner_loop3(i32* %ptr) { +; CHECK-LABEL: @hoist_inner_loop3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: [[X_A:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: [[X_B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i1 [[V1]] +; CHECK-NEXT: br i1 [[V1_FR]], label [[B_HEADER_SPLIT_US:%.*]], label [[B_HEADER_SPLIT:%.*]] +; CHECK: b.header.split.us: +; CHECK-NEXT: br label [[C_HEADER_US:%.*]] +; CHECK: c.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[B_LATCH_SPLIT_US:%.*]] +; CHECK: b.latch.split.us: +; CHECK-NEXT: br label [[B_LATCH:%.*]] +; CHECK: b.header.split: +; CHECK-NEXT: [[X_A_LCSSA:%.*]] = phi i32 [ [[X_A]], [[B_HEADER]] ] +; CHECK-NEXT: [[X_B_LCSSA:%.*]] = phi i32 [ [[X_B]], [[B_HEADER]] ] +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[C_BODY:%.*]] +; CHECK: c.body: +; CHECK-NEXT: [[X_C:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[D_HEADER:%.*]] +; CHECK: d.header: +; CHECK-NEXT: store i32 [[X_A_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_B_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_C]], i32* [[PTR]], align 4 +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[D_HEADER]], label [[C_LATCH:%.*]] +; CHECK: c.latch: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[C_HEADER]], label [[EXIT:%.*]] +; CHECK: b.latch: +; CHECK-NEXT: [[V4:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V4]], label [[B_HEADER]], label [[A_LATCH:%.*]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %c.header + +c.header: + call i32 @c() + br i1 %v1, label %b.latch, label %c.body + +c.body: + %x.c = load i32, i32* %ptr + br label %d.header + +d.header: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + store i32 %x.c, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %d.header, label %c.latch + +c.latch: + %v3 = call i1 @cond() + br i1 %v3, label %c.header, label %exit + +b.latch: + %v4 = call i1 @cond() + br i1 %v4, label %b.header, label %a.latch + +a.latch: + br label %a.header + +exit: + ret void +} + +; This test is designed to exercise checking multiple remaining exits from the +; loop being unswitched. +; Unswitch will transform the loop nest from: +; A < B < C < D +; into +; A < B < (C, D) +define void @hoist_inner_loop4() { +; CHECK-LABEL: @hoist_inner_loop4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[V1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i1 [[V1]] +; CHECK-NEXT: br i1 [[V1_FR]], label [[C_HEADER_SPLIT_US:%.*]], label [[C_HEADER_SPLIT:%.*]] +; CHECK: c.header.split.us: +; CHECK-NEXT: br label [[D_HEADER_US:%.*]] +; CHECK: d.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @d() +; CHECK-NEXT: br label [[C_LATCH_SPLIT_US:%.*]] +; CHECK: c.latch.split.us: +; CHECK-NEXT: br label [[C_LATCH:%.*]] +; CHECK: c.header.split: +; CHECK-NEXT: br label [[D_HEADER:%.*]] +; CHECK: d.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @d() +; CHECK-NEXT: br label [[D_EXITING1:%.*]] +; CHECK: d.exiting1: +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[D_EXITING2:%.*]], label [[A_LATCH:%.*]] +; CHECK: d.exiting2: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[D_EXITING3:%.*]], label [[LOOPEXIT_D:%.*]] +; CHECK: d.exiting3: +; CHECK-NEXT: [[V4:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V4]], label [[D_LATCH:%.*]], label [[B_LATCH:%.*]] +; CHECK: d.latch: +; CHECK-NEXT: br label [[D_HEADER]] +; CHECK: c.latch: +; CHECK-NEXT: [[V5:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V5]], label [[C_HEADER]], label [[LOOPEXIT_C:%.*]] +; CHECK: b.latch: +; CHECK-NEXT: br label [[B_HEADER]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: loopexit.d: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: loopexit.c: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + br label %b.header + +b.header: + br label %c.header + +c.header: + %v1 = call i1 @cond() + br label %d.header + +d.header: + call i32 @d() + br i1 %v1, label %c.latch, label %d.exiting1 + +d.exiting1: + %v2 = call i1 @cond() + br i1 %v2, label %d.exiting2, label %a.latch + +d.exiting2: + %v3 = call i1 @cond() + br i1 %v3, label %d.exiting3, label %loopexit.d + +d.exiting3: + %v4 = call i1 @cond() + br i1 %v4, label %d.latch, label %b.latch + +d.latch: + br label %d.header + +c.latch: + %v5 = call i1 @cond() + br i1 %v5, label %c.header, label %loopexit.c + +b.latch: + br label %b.header + +a.latch: + br label %a.header + +loopexit.d: + br label %exit + +loopexit.c: + br label %exit + +exit: + ret void +} + +; Unswitch will transform the loop nest from: +; A < B < C < D +; into +; A < ((B < C), D) +define void @hoist_inner_loop5(i32* %ptr) { +; CHECK-LABEL: @hoist_inner_loop5( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: [[X_A:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: [[X_B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[X_C:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i1 [[V1]] +; CHECK-NEXT: br i1 [[V1_FR]], label [[C_HEADER_SPLIT_US:%.*]], label [[C_HEADER_SPLIT:%.*]] +; CHECK: c.header.split.us: +; CHECK-NEXT: br label [[D_HEADER_US:%.*]] +; CHECK: d.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @d() +; CHECK-NEXT: br label [[C_LATCH_SPLIT_US:%.*]] +; CHECK: c.latch.split.us: +; CHECK-NEXT: br label [[C_LATCH:%.*]] +; CHECK: c.header.split: +; CHECK-NEXT: [[X_B_LCSSA:%.*]] = phi i32 [ [[X_B]], [[C_HEADER]] ] +; CHECK-NEXT: [[X_C_LCSSA:%.*]] = phi i32 [ [[X_C]], [[C_HEADER]] ] +; CHECK-NEXT: br label [[D_HEADER:%.*]] +; CHECK: d.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @d() +; CHECK-NEXT: br label [[D_LATCH:%.*]] +; CHECK: d.latch: +; CHECK-NEXT: store i32 [[X_A]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_B_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_C_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[D_HEADER]], label [[A_LATCH:%.*]] +; CHECK: c.latch: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[C_HEADER]], label [[B_LATCH:%.*]] +; CHECK: b.latch: +; CHECK-NEXT: br label [[B_HEADER]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + br label %c.header + +c.header: + %x.c = load i32, i32* %ptr + %v1 = call i1 @cond() + br label %d.header + +d.header: + call i32 @d() + br i1 %v1, label %c.latch, label %d.latch + +d.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + store i32 %x.c, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %d.header, label %a.latch + +c.latch: + %v3 = call i1 @cond() + br i1 %v3, label %c.header, label %b.latch + +b.latch: + br label %b.header + +a.latch: + br label %a.header + +exit: + ret void +} + +define void @hoist_inner_loop_switch(i32* %ptr) { +; CHECK-LABEL: @hoist_inner_loop_switch( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[A_HEADER:%.*]] +; CHECK: a.header: +; CHECK-NEXT: [[X_A:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[B_HEADER:%.*]] +; CHECK: b.header: +; CHECK-NEXT: [[X_B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[V1:%.*]] = call i32 @cond.i32() +; CHECK-NEXT: [[V1_FR:%.*]] = freeze i32 [[V1]] +; CHECK-NEXT: switch i32 [[V1_FR]], label [[B_HEADER_SPLIT:%.*]] [ +; CHECK-NEXT: i32 1, label [[B_HEADER_SPLIT_US:%.*]] +; CHECK-NEXT: i32 2, label [[B_HEADER_SPLIT_US]] +; CHECK-NEXT: i32 3, label [[B_HEADER_SPLIT_US]] +; CHECK-NEXT: ] +; CHECK: b.header.split.us: +; CHECK-NEXT: br label [[C_HEADER_US:%.*]] +; CHECK: c.header.us: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[B_LATCH_SPLIT_US:%.*]] +; CHECK: b.latch.split.us: +; CHECK-NEXT: br label [[B_LATCH:%.*]] +; CHECK: b.header.split: +; CHECK-NEXT: [[X_A_LCSSA:%.*]] = phi i32 [ [[X_A]], [[B_HEADER]] ] +; CHECK-NEXT: [[X_B_LCSSA:%.*]] = phi i32 [ [[X_B]], [[B_HEADER]] ] +; CHECK-NEXT: br label [[C_HEADER:%.*]] +; CHECK: c.header: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @c() +; CHECK-NEXT: br label [[C_LATCH:%.*]] +; CHECK: c.latch: +; CHECK-NEXT: store i32 [[X_A_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[X_B_LCSSA]], i32* [[PTR]], align 4 +; CHECK-NEXT: [[V2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V2]], label [[C_HEADER]], label [[EXIT:%.*]] +; CHECK: b.latch: +; CHECK-NEXT: [[V3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[V3]], label [[B_HEADER]], label [[A_LATCH:%.*]] +; CHECK: a.latch: +; CHECK-NEXT: br label [[A_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %a.header + +a.header: + %x.a = load i32, i32* %ptr + br label %b.header + +b.header: + %x.b = load i32, i32* %ptr + %v1 = call i32 @cond.i32() + br label %c.header + +c.header: + call i32 @c() + switch i32 %v1, label %c.latch [ + i32 1, label %b.latch + i32 2, label %b.latch + i32 3, label %b.latch + ] + +c.latch: + ; Use values from other loops to check LCSSA form. + store i32 %x.a, i32* %ptr + store i32 %x.b, i32* %ptr + %v2 = call i1 @cond() + br i1 %v2, label %c.header, label %exit + +b.latch: + %v3 = call i1 @cond() + br i1 %v3, label %b.header, label %a.latch + +a.latch: + br label %a.header + +exit: + ret void +}