[LoopInterchange] Fix transformation bugs in loop interchange

After loop interchange, the (old) outer loop header should not jump to
`LoopExit`. Note that the old outer loop becomes the new inner loop
after interchange. If we branched to `LoopExit` then after interchange
we would jump directly from the (new) inner loop header to `LoopExit`
without executing the rest of (new) outer loop.

This patch modifies adjustLoopBranches() such that the old outer
loop header (which becomes the new inner loop header) jumps to the
old inner loop latch which becomes the new outer loop latch after
interchange.

Reviewed By: bmahjour

Differential Revision: https://reviews.llvm.org/D98475
This commit is contained in:
CongzheUalberta 2021-04-07 20:44:32 -04:00
parent 5f6b3d1833
commit f5645ea65f
3 changed files with 212 additions and 7 deletions

View File

@ -1551,9 +1551,12 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
// The outer loop header might or might not branch to the outer latch.
// We are guaranteed to branch to the inner loop preheader.
if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
// In this case the outerLoopHeader should branch to the InnerLoopLatch.
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
DTUpdates,
/*MustUpdateOnce=*/false);
}
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
InnerLoopHeaderSuccessor, DTUpdates,
/*MustUpdateOnce=*/false);

View File

@ -25,12 +25,12 @@ define void @lcssa_08(i32 %n, i32 %m) {
; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]]
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[INNER_FOR_BODY_SPLIT:%.*]]
; CHECK: inner.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: br label [[INNER_FOR_BODY:%.*]]
; CHECK: inner.for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT]] ]
; CHECK-NEXT: br label [[OUTER_PREHEADER]]
; CHECK: inner.for.body.split1:
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* [[TEMP]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV27]]
@ -41,10 +41,10 @@ define void @lcssa_08(i32 %n, i32 %m) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]]
; CHECK: inner.for.body.split:
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ]
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ], [ [[WIDE_TRIP_COUNT]], [[OUTER_HEADER]] ]
; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]]
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA]]
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE:%.*]]
; CHECK: inner.crit_edge:
; CHECK-NEXT: br label [[OUTER_LATCH]]
; CHECK: outer.latch:

View File

@ -0,0 +1,202 @@
; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s
@b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2
@a = common global i32 0, align 4
@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4
; Doubly nested loop
;; C test case:
;; int a;
;; short b[3][5][8] = {{}, {{}, 0, 0, 0, 6, 1, 6}};
;; void test1() {
;; int c = 0, d;
;; for (; c <= 2; c++) {
;; if (c)
;; continue;
;; d = 0;
;; for (; d <= 2; d++)
;; a |= b[d][d][c + 5];
;; }
;; }
define void @test1() {
;CHECK-LABEL: @test1(
;CHECK: entry:
;CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
;CHECK: for.body.preheader:
;CHECK-NEXT: br label [[FOR_BODY:%.*]]
;CHECK: for.body:
;CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ [[INDVARS_IV_NEXT23:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
;CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[INDVARS_IV22:%.*]], 0
;CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY3_SPLIT1:%.*]], label [[FOR_BODY3_SPLIT:%.*]]
;CHECK: for.cond1.preheader:
;CHECK-NEXT: br label [[FOR_BODY3:%.*]]
;CHECK: for.body3:
;CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ %3, [[FOR_BODY3_SPLIT]] ]
;CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
;CHECK: for.body3.split1:
;CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 5
;CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV]], i64 [[TMP0]]
;CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX7]]
;CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
;CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a
;CHECK-NEXT: [[TMP_OR:%.*]] = or i32 [[TMP2]], [[CONV]]
;CHECK-NEXT: store i32 [[TMP_OR]], i32* @a
;CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
;CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 3
;CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT:%.*]]
;CHECK: for.body3.split:
;CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
;CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 3
;CHECK-NEXT: br i1 %4, label [[FOR_BODY3]], label [[FOR_END10:%.*]]
;CHECK: for.inc8.loopexit:
;CHECK-NEXT: br label [[FOR_INC8]]
;CHECK: for.inc8:
;CHECK-NEXT: [[INDVARS_IV_NEXT23]] = add nuw nsw i64 [[INDVARS_IV22]], 1
;CHECK-NEXT: [[EXITCOND25:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT23]], 3
;CHECK-NEXT: br i1 [[EXITCOND25]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]]
;CHECK: for.end10:
;CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @a
;CHECK-NEXT: ret void
entry:
br label %for.body
for.body: ; preds = %entry, %for.inc8
%indvars.iv22 = phi i64 [ 0, %entry ], [ %indvars.iv.next23, %for.inc8 ]
%tobool = icmp eq i64 %indvars.iv22, 0
br i1 %tobool, label %for.cond1.preheader, label %for.inc8
for.cond1.preheader: ; preds = %for.body
br label %for.body3
for.body3: ; preds = %for.cond1.preheader, %for.body3
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
%0 = add nuw nsw i64 %indvars.iv22, 5
%arrayidx7 = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 %indvars.iv, i64 %indvars.iv, i64 %0
%1 = load i16, i16* %arrayidx7
%conv = sext i16 %1 to i32
%2 = load i32, i32* @a
%or = or i32 %2, %conv
store i32 %or, i32* @a
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 3
br i1 %exitcond, label %for.body3, label %for.inc8.loopexit
for.inc8.loopexit: ; preds = %for.body3
br label %for.inc8
for.inc8: ; preds = %for.inc8.loopexit, %for.body
%indvars.iv.next23 = add nuw nsw i64 %indvars.iv22, 1
%exitcond25 = icmp ne i64 %indvars.iv.next23, 3
br i1 %exitcond25, label %for.body, label %for.end10
for.end10: ; preds = %for.inc8
%3 = load i32, i32* @a
ret void
}
; Triply nested loop
; The innermost and the middle loop are interchanged.
; C test case:
;; a;
;; d[][6];
;; void test2() {
;; int g = 10;
;; for (; g; g = g - 5) {
;; short c = 4;
;; for (; c; c--) {
;; int i = 4;
;; for (; i; i--) {
;; if (a)
;; break;
;; d[i][c] = 0;
;; }
;; }
;; }
;; }
define void @test2() {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[OUTERMOST_HEADER:%.*]]
; CHECK: outermost.header:
; CHECK-NEXT: [[INDVAR_OUTERMOST:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[INDVAR_OUTERMOST_NEXT:%.*]], [[OUTERMOST_LATCH:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4
; CHECK-NEXT: [[TOBOOL71_I:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br label [[INNERMOST_PREHEADER:%.*]]
; CHECK: middle.header.preheader:
; CHECK-NEXT: br label [[MIDDLE_HEADER:%.*]]
; CHECK: middle.header:
; CHECK-NEXT: [[INDVAR_MIDDLE:%.*]] = phi i64 [ [[INDVAR_MIDDLE_NEXT:%.*]], [[MIDDLE_LATCH:%.*]] ], [ 4, [[MIDDLE_HEADER_PREHEADER:%.*]] ]
; CHECK-NEXT: br i1 [[TOBOOL71_I]], label [[INNERMOST_BODY_SPLIT1:%.*]], label [[INNERMOST_BODY_SPLIT:%.*]]
; CHECK: innermost.preheader:
; CHECK-NEXT: br label [[INNERMOST_BODY:%.*]]
; CHECK: innermost.body:
; CHECK-NEXT: [[INDVAR_INNERMOST:%.*]] = phi i64 [ [[TMP1:%.*]], [[INNERMOST_BODY_SPLIT]] ], [ 4, [[INNERMOST_PREHEADER]] ]
; CHECK-NEXT: br label [[MIDDLE_HEADER_PREHEADER]]
; CHECK: innermost.body.split1:
; CHECK-NEXT: [[ARRAYIDX9_I:%.*]] = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 [[INDVAR_INNERMOST]], i64 [[INDVAR_MIDDLE]]
; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX9_I]], align 4
; CHECK-NEXT: [[INDVAR_INNERMOST_NEXT:%.*]] = add nsw i64 [[INDVAR_INNERMOST]], -1
; CHECK-NEXT: [[TOBOOL5_I:%.*]] = icmp eq i64 [[INDVAR_INNERMOST_NEXT]], 0
; CHECK-NEXT: br label [[MIDDLE_LATCH_LOOPEXIT:%.*]]
; CHECK: innermost.body.split:
; CHECK-NEXT: [[TMP1]] = add nsw i64 [[INDVAR_INNERMOST]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OUTERMOST_LATCH]], label [[INNERMOST_BODY]]
; CHECK: innermost.loopexit:
; CHECK-NEXT: br label [[MIDDLE_LATCH]]
; CHECK: middle.latch:
; CHECK-NEXT: [[INDVAR_MIDDLE_NEXT]] = add nsw i64 [[INDVAR_MIDDLE]], -1
; CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp eq i64 [[INDVAR_MIDDLE_NEXT]], 0
; CHECK-NEXT: br i1 [[TOBOOL2_I]], label [[INNERMOST_BODY_SPLIT]], label [[MIDDLE_HEADER]]
; CHECK: outermost.latch:
; CHECK-NEXT: [[INDVAR_OUTERMOST_NEXT]] = add nsw i32 [[INDVAR_OUTERMOST]], -5
; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i32 [[INDVAR_OUTERMOST_NEXT]], 0
; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[OUTERMOST_EXIT:%.*]], label [[OUTERMOST_HEADER]]
; CHECK: outermost.exit:
; CHECK-NEXT: ret void
;
entry:
br label %outermost.header
outermost.header: ; preds = %outermost.latch, %entry
%indvar.outermost = phi i32 [ 10, %entry ], [ %indvar.outermost.next, %outermost.latch ]
%0 = load i32, i32* @a, align 4
%tobool71.i = icmp eq i32 %0, 0
br label %middle.header
middle.header: ; preds = %middle.latch, %outermost.header
%indvar.middle = phi i64 [ 4, %outermost.header ], [ %indvar.middle.next, %middle.latch ]
br i1 %tobool71.i, label %innermost.preheader, label %middle.latch
innermost.preheader: ; preds = %middle.header
br label %innermost.body
innermost.body: ; preds = %innermost.preheader, %innermost.body
%indvar.innermost = phi i64 [ %indvar.innermost.next, %innermost.body ], [ 4, %innermost.preheader ]
%arrayidx9.i = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 %indvar.innermost, i64 %indvar.middle
store i32 0, i32* %arrayidx9.i, align 4
%indvar.innermost.next = add nsw i64 %indvar.innermost, -1
%tobool5.i = icmp eq i64 %indvar.innermost.next, 0
br i1 %tobool5.i, label %innermost.loopexit, label %innermost.body
innermost.loopexit: ; preds = %innermost.body
br label %middle.latch
middle.latch: ; preds = %middle.latch.loopexit, %middle.header
%indvar.middle.next = add nsw i64 %indvar.middle, -1
%tobool2.i = icmp eq i64 %indvar.middle.next, 0
br i1 %tobool2.i, label %outermost.latch, label %middle.header
outermost.latch: ; preds = %middle.latch
%indvar.outermost.next = add nsw i32 %indvar.outermost, -5
%tobool.i = icmp eq i32 %indvar.outermost.next, 0
br i1 %tobool.i, label %outermost.exit, label %outermost.header
outermost.exit: ; preds = %outermost.latch
ret void
}