forked from OSchip/llvm-project
[LoopInterchange] Move instructions from preheader to outer loop header.
Instructions defined in the original inner loop preheader may depend on values defined in the outer loop header, but the inner loop header will become the entry block in the loop nest. Move the instructions from the preheader to the outer loop header, so we do not break dominance. We also have to check for unsafe instructions in the preheader. If there are no unsafe instructions, all instructions should be movable. Currently we move all instructions except the terminator and rely on LICM to hoist out invariant instructions later. Fixes PR45743
This commit is contained in:
parent
54cb552b96
commit
8393b9fd1f
|
@ -625,6 +625,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
|
|||
containsUnsafeInstructions(OuterLoopLatch))
|
||||
return false;
|
||||
|
||||
// Also make sure the inner loop preheader does not contain any unsafe
|
||||
// instructions. Note that all instructions in the preheader will be moved to
|
||||
// the outer loop header when interchanging.
|
||||
if (InnerLoopPreHeader != OuterLoopHeader &&
|
||||
containsUnsafeInstructions(InnerLoopPreHeader))
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
|
||||
// We have a perfect loop nest.
|
||||
return true;
|
||||
|
@ -1306,6 +1313,21 @@ bool LoopInterchangeTransform::transform() {
|
|||
LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
|
||||
}
|
||||
|
||||
// Instructions in the original inner loop preheader may depend on values
|
||||
// defined in the outer loop header. Move them there, because the original
|
||||
// inner loop preheader will become the entry into the interchanged loop nest.
|
||||
// Currently we move all instructions and rely on LICM to move invariant
|
||||
// instructions outside the loop nest.
|
||||
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
|
||||
BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
|
||||
if (InnerLoopPreHeader != OuterLoopHeader) {
|
||||
SmallPtrSet<Instruction *, 4> NeedsMoving;
|
||||
for (Instruction &I :
|
||||
make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
|
||||
std::prev(InnerLoopPreHeader->end()))))
|
||||
I.moveBefore(OuterLoopHeader->getTerminator());
|
||||
}
|
||||
|
||||
Transformed |= adjustLoopLinks();
|
||||
if (!Transformed) {
|
||||
LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
|
||||
|
|
|
@ -20,11 +20,11 @@ define void @lcssa_08(i32 %n, i32 %m) {
|
|||
; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP24]], label [[INNER_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
|
||||
; CHECK: outer.preheader:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M:%.*]] to i64
|
||||
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
|
||||
; CHECK: outer.header:
|
||||
; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ]
|
||||
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M]], 0
|
||||
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
|
||||
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]]
|
||||
; CHECK: inner.preheader:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64
|
||||
|
@ -41,8 +41,9 @@ define void @lcssa_08(i32 %n, i32 %m) {
|
|||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]]
|
||||
; CHECK: inner.for.body.split:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ]
|
||||
; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]]
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]]
|
||||
; CHECK: inner.crit_edge:
|
||||
; CHECK-NEXT: br label [[OUTER_LATCH]]
|
||||
|
|
|
@ -0,0 +1,141 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -loop-interchange -S %s | FileCheck %s
|
||||
|
||||
@global = external local_unnamed_addr global [2 x [10 x i32]], align 16
|
||||
|
||||
; We need to move %tmp4 from the inner loop pre header to the outer loop header
|
||||
; before interchanging.
|
||||
define void @test1() local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: br label [[INNER_PH:%.*]]
|
||||
; CHECK: outer.header.preheader:
|
||||
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
|
||||
; CHECK: outer.header:
|
||||
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
|
||||
; CHECK-NEXT: [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED:%.*]], [[OUTER_HEADER_PREHEADER]] ], [ [[RED_NEXT:%.*]], [[OUTER_LATCH]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
|
||||
; CHECK-NEXT: br label [[INNER_SPLIT1:%.*]]
|
||||
; CHECK: inner.ph:
|
||||
; CHECK-NEXT: br label [[INNER:%.*]]
|
||||
; CHECK: inner:
|
||||
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[TMP0:%.*]], [[INNER_SPLIT:%.*]] ]
|
||||
; CHECK-NEXT: [[OUTER_RED]] = phi i32 [ [[RED_NEXT_LCSSA:%.*]], [[INNER_SPLIT]] ], [ 0, [[INNER_PH]] ]
|
||||
; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]]
|
||||
; CHECK: inner.split1:
|
||||
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[INNER_RED]], 20
|
||||
; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
|
||||
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
|
||||
; CHECK-NEXT: br label [[OUTER_LATCH]]
|
||||
; CHECK: inner.split:
|
||||
; CHECK-NEXT: [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[OUTER_LATCH]] ]
|
||||
; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 400
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[INNER]]
|
||||
; CHECK: outer.latch:
|
||||
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
|
||||
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
|
||||
; CHECK-NEXT: br i1 [[EC_2]], label [[INNER_SPLIT]], label [[OUTER_HEADER]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
br label %outer.header
|
||||
|
||||
outer.header: ; preds = %bb11, %bb
|
||||
%outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
|
||||
%outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
|
||||
br label %inner.ph
|
||||
|
||||
inner.ph: ; preds = %bb1
|
||||
%tmp4 = add nsw i64 %outer.iv, 9
|
||||
br label %inner
|
||||
|
||||
inner: ; preds = %bb5, %bb3
|
||||
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
|
||||
%inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
|
||||
%ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
|
||||
store i32 0, i32* %ptr
|
||||
%red.next = or i32 %inner.red, 20
|
||||
%inner.iv.next = add nsw i64 %inner.iv, 1
|
||||
%ec.1 = icmp eq i64 %inner.iv.next, 400
|
||||
br i1 %ec.1, label %outer.latch, label %inner
|
||||
|
||||
outer.latch: ; preds = %bb5
|
||||
%red.next.lcssa = phi i32 [ %red.next, %inner ]
|
||||
%outer.iv.next = add nsw i64 %outer.iv, 1
|
||||
%ec.2 = icmp eq i64 %outer.iv.next, 400
|
||||
br i1 %ec.2, label %exit, label %outer.header
|
||||
|
||||
exit: ; preds = %bb11
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @side_effect()
|
||||
|
||||
; Cannot interchange, as the inner loop preheader contains a call to a function
|
||||
; with side effects.
|
||||
|
||||
define void @test2() {
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
|
||||
; CHECK: outer.header:
|
||||
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
|
||||
; CHECK-NEXT: [[OUTER_RED:%.*]] = phi i32 [ 0, [[BB]] ], [ [[RED_NEXT_LCSSA:%.*]], [[OUTER_LATCH]] ]
|
||||
; CHECK-NEXT: br label [[INNER_PH:%.*]]
|
||||
; CHECK: inner.ph:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
|
||||
; CHECK-NEXT: call void @side_effect()
|
||||
; CHECK-NEXT: br label [[INNER:%.*]]
|
||||
; CHECK: inner:
|
||||
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
|
||||
; CHECK-NEXT: [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED]], [[INNER_PH]] ], [ [[RED_NEXT:%.*]], [[INNER]] ]
|
||||
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[RED_NEXT]] = or i32 [[INNER_RED]], 20
|
||||
; CHECK-NEXT: [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
|
||||
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
|
||||
; CHECK-NEXT: br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
|
||||
; CHECK: outer.latch:
|
||||
; CHECK-NEXT: [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[INNER]] ]
|
||||
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
|
||||
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
|
||||
; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
br label %outer.header
|
||||
|
||||
outer.header: ; preds = %bb11, %bb
|
||||
%outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
|
||||
%outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
|
||||
br label %inner.ph
|
||||
|
||||
inner.ph: ; preds = %bb1
|
||||
%tmp4 = add nsw i64 %outer.iv, 9
|
||||
call void @side_effect()
|
||||
br label %inner
|
||||
|
||||
inner: ; preds = %bb5, %bb3
|
||||
%inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
|
||||
%inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
|
||||
%ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
|
||||
store i32 0, i32* %ptr
|
||||
%red.next = or i32 %inner.red, 20
|
||||
%inner.iv.next = add nsw i64 %inner.iv, 1
|
||||
%ec.1 = icmp eq i64 %inner.iv.next, 400
|
||||
br i1 %ec.1, label %outer.latch, label %inner
|
||||
|
||||
outer.latch: ; preds = %bb5
|
||||
%red.next.lcssa = phi i32 [ %red.next, %inner ]
|
||||
%outer.iv.next = add nsw i64 %outer.iv, 1
|
||||
%ec.2 = icmp eq i64 %outer.iv.next, 400
|
||||
br i1 %ec.2, label %exit, label %outer.header
|
||||
|
||||
exit: ; preds = %bb11
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue