From ef51eed37b7ed67b3c0e5f70fa61d681ba21787d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 22 Jan 2021 16:31:29 -0800 Subject: [PATCH] [LoopDeletion] Handle inner loops w/untaken backedges This builds on the restricted after initial revert form of D93906, and adds back support for breaking backedges of inner loops. It turns out the original invalidation logic wasn't quite right, specifically around the handling of LCSSA. When breaking the backedge of an inner loop, we can cause blocks which were in the outer loop only because they were also included in a sub-loop to be removed from both loops. This results in the exit block set for our original parent loop changing, and thus a need for new LCSSA phi nodes. This case happens when the inner loop has an exit block which is also an exit block of the parent, and there's a block in the child which reaches an exit to said block without also reaching an exit to the parent loop. (I'm describing this in terms of the immediate parent, but the problem is general for any transitive parent in the nest.) The approach implemented here involves a potentially expensive LCSSA rebuild. Perf testing during review didn't show anything concerning, but we may end up needing to revert this if anyone encounters a practical compile time issue. Differential Revision: https://reviews.llvm.org/D94378 --- llvm/lib/Transforms/Scalar/LoopDeletion.cpp | 8 -------- llvm/lib/Transforms/Utils/LoopUtils.cpp | 17 +++++++++++++++-- .../Transforms/IndVarSimplify/X86/pr45360.ll | 6 +++--- llvm/test/Transforms/LoopDeletion/zero-btc.ll | 11 ++++++++--- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index bd5cdeabb9bd..1266c93316fa 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -151,14 +151,6 @@ breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (!BTC->isZero()) return LoopDeletionResult::Unmodified; - // For non-outermost loops, the tricky case is that we can drop blocks - // out of both inner and outer loops at the same time. This results in - // new exiting block for the outer loop appearing, and possibly needing - // an lcssa phi inserted. (See loop_nest_lcssa test case in zero-btc.ll) - // TODO: We can handle a bunch of cases here without much work, revisit. - if (!L->isOutermost()) - return LoopDeletionResult::Unmodified; - breakLoopBackedge(L, DT, SE, LI, MSSA); return LoopDeletionResult::Deleted; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index e6575ee2caf2..8d167923db00 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -761,13 +761,18 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } } +static Loop *getOutermostLoop(Loop *L) { + while (Loop *Parent = L->getParentLoop()) + L = Parent; + return L; +} + void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA) { - - assert(L->isOutermost() && "Can't yet preserve LCSSA for this case"); auto *Latch = L->getLoopLatch(); assert(Latch && "multiple latches not yet supported"); auto *Header = L->getHeader(); + Loop *OutermostLoop = getOutermostLoop(L); SE.forgetLoop(L); @@ -790,6 +795,14 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // Erase (and destroy) this loop instance. Handles relinking sub-loops // and blocks within the loop as needed. LI.erase(L); + + // If the loop we broke had a parent, then changeToUnreachable might have + // caused a block to be removed from the parent loop (see loop_nest_lcssa + // test case in zero-btc.ll for an example), thus changing the parent's + // exit blocks. If that happened, we need to rebuild LCSSA on the outermost + // loop which might have a had a block removed. + if (OutermostLoop != L) + formLCSSARecursively(*OutermostLoop, DT, &LI, &SE); } diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll index d0857fa707b1..397c23cfd3ea 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -23,8 +23,8 @@ define dso_local i32 @main() { ; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4 ; CHECK-NEXT: [[D_PROMOTED9:%.*]] = load i32, i32* @d, align 4 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: +; CHECK-NEXT: br label [[BB13_PREHEADER:%.*]] +; CHECK: bb13.preheader: ; CHECK-NEXT: [[I8_LCSSA10:%.*]] = phi i32 [ [[D_PROMOTED9]], [[BB:%.*]] ], [ [[I8:%.*]], [[BB19_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I8]] = and i32 [[I8_LCSSA10]], [[I6]] ; CHECK-NEXT: [[I21:%.*]] = icmp eq i32 [[I8]], 0 @@ -33,7 +33,7 @@ define dso_local i32 @main() { ; CHECK-NEXT: [[I26:%.*]] = urem i32 [[I24]], [[I8]] ; CHECK-NEXT: store i32 [[I26]], i32* @e, align 4 ; CHECK-NEXT: [[I30_NOT:%.*]] = icmp eq i32 [[I26]], 0 -; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB1]] +; CHECK-NEXT: br i1 [[I30_NOT]], label [[BB32_LOOPEXIT:%.*]], label [[BB13_PREHEADER]] ; CHECK: bb13.preheader.bb27.thread.split_crit_edge: ; CHECK-NEXT: store i32 -1, i32* @f, align 4 ; CHECK-NEXT: store i32 0, i32* @d, align 4 diff --git a/llvm/test/Transforms/LoopDeletion/zero-btc.ll b/llvm/test/Transforms/LoopDeletion/zero-btc.ll index e3a37865321b..413b0a877dd4 100644 --- a/llvm/test/Transforms/LoopDeletion/zero-btc.ll +++ b/llvm/test/Transforms/LoopDeletion/zero-btc.ll @@ -302,7 +302,9 @@ define void @test_live_outer() { ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: br i1 false, label [[INNER]], label [[LATCH]] +; CHECK-NEXT: br i1 false, label [[INNER_INNER_CRIT_EDGE:%.*]], label [[LATCH]] +; CHECK: inner.inner_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: latch: ; CHECK-NEXT: store i32 [[IV]], i32* @G, align 4 ; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1 @@ -346,11 +348,14 @@ define void @loop_nest_lcssa() { ; CHECK: inner_header: ; CHECK-NEXT: br i1 false, label [[INNER_LATCH:%.*]], label [[OUTER_LATCH:%.*]] ; CHECK: inner_latch: -; CHECK-NEXT: br i1 false, label [[INNER_HEADER]], label [[LOOPEXIT:%.*]] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[INNER_HEADER]] ] +; CHECK-NEXT: br i1 false, label [[INNER_LATCH_INNER_HEADER_CRIT_EDGE:%.*]], label [[LOOPEXIT:%.*]] +; CHECK: inner_latch.inner_header_crit_edge: +; CHECK-NEXT: unreachable ; CHECK: outer_latch: ; CHECK-NEXT: br label [[OUTER_HEADER]] ; CHECK: loopexit: -; CHECK-NEXT: [[DOTLCSSA32:%.*]] = phi i32 [ [[TMP0]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[DOTLCSSA32:%.*]] = phi i32 [ [[DOTLCSSA]], [[INNER_LATCH]] ] ; CHECK-NEXT: unreachable ; entry: