forked from OSchip/llvm-project
[IRCE] Canonicalize pre/post loops after the blocks are added into parent loop
Summary: We were canonizalizing the pre loop (into loop-simplify form) before the post loop blocks were added into parent loop. This is incorrect when IRCE is done on a subloop. The post-loop blocks are created, but not yet added to the parent loop. So, loop-simplification on the pre-loop incorrectly updates LoopInfo. This patch corrects the ordering so that pre and post loop blocks are added to parent loop (if any), and then the loops are canonicalized to LCSSA and LoopSimplifyForm. Reviewers: reames, sanjoy, apilipenko Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D33846 llvm-svn: 304800
This commit is contained in:
parent
3446ff4df5
commit
7218032019
|
@ -1371,28 +1371,35 @@ bool LoopConstrainer::run() {
|
|||
|
||||
DT.recalculate(F);
|
||||
|
||||
// We need to first add all the pre and post loop blocks into the loop
|
||||
// structures (as part of createClonedLoopStructure), and then update the
|
||||
// LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
|
||||
// LI when LoopSimplifyForm is generated.
|
||||
Loop *PreL = nullptr, *PostL = nullptr;
|
||||
if (!PreLoop.Blocks.empty()) {
|
||||
auto *L = createClonedLoopStructure(
|
||||
PreL = createClonedLoopStructure(
|
||||
&OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map);
|
||||
formLCSSARecursively(*L, DT, &LI, &SE);
|
||||
simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
|
||||
// Pre loops are slow paths, we do not need to perform any loop
|
||||
// optimizations on them.
|
||||
DisableAllLoopOptsOnLoop(*L);
|
||||
}
|
||||
|
||||
if (!PostLoop.Blocks.empty()) {
|
||||
auto *L = createClonedLoopStructure(
|
||||
PostL = createClonedLoopStructure(
|
||||
&OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map);
|
||||
formLCSSARecursively(*L, DT, &LI, &SE);
|
||||
simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
|
||||
// Post loops are slow paths, we do not need to perform any loop
|
||||
// optimizations on them.
|
||||
DisableAllLoopOptsOnLoop(*L);
|
||||
}
|
||||
|
||||
formLCSSARecursively(OriginalLoop, DT, &LI, &SE);
|
||||
simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true);
|
||||
// This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
|
||||
auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
|
||||
formLCSSARecursively(*L, DT, &LI, &SE);
|
||||
simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
|
||||
// Pre/post loops are slow paths, we do not need to perform any loop
|
||||
// optimizations on them.
|
||||
if (!IsOriginalLoop)
|
||||
DisableAllLoopOptsOnLoop(*L);
|
||||
};
|
||||
if (PreL)
|
||||
CanonicalizeLoop(PreL, false);
|
||||
if (PostL)
|
||||
CanonicalizeLoop(PostL, false);
|
||||
CanonicalizeLoop(&OriginalLoop, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -irce < %s -S | FileCheck %s
|
||||
|
||||
; REQUIRES: asserts
|
||||
|
||||
; IRCE creates the pre and post loop, and invokes the
|
||||
; canonicalizing these loops to LCSSA and loop-simplfy structure. Make sure that the update to the loopinfo does not
|
||||
; incorrectly change the header while canonicalizing these pre/post loops. We
|
||||
; were incorrectly updating LI when the split loop is a subloop as in the case below.
|
||||
source_filename = "correct-loop-info.ll"
|
||||
|
||||
define void @baz() personality i32* ()* @ham {
|
||||
; CHECK-LABEL: @baz(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: br label [[OUTERHEADER:%.*]]
|
||||
; CHECK: outerheader:
|
||||
; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 undef, 84
|
||||
; CHECK-NEXT: br i1 [[TMP]], label [[BB2:%.*]], label [[BB16:%.*]]
|
||||
; CHECK: bb2:
|
||||
; CHECK-NEXT: br i1 false, label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
|
||||
; CHECK: innerheader.preloop.preheader:
|
||||
; CHECK-NEXT: br label [[INNERHEADER_PRELOOP:%.*]]
|
||||
; CHECK: mainloop:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], -1
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
|
||||
; CHECK: innerheader.preheader:
|
||||
; CHECK-NEXT: br label [[INNERHEADER:%.*]]
|
||||
; CHECK: innerheader:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP6:%.*]], [[BB8:%.*]] ], [ [[TMP4_PRELOOP_COPY:%.*]], [[INNERHEADER_PREHEADER]] ]
|
||||
; CHECK-NEXT: invoke void @pluto()
|
||||
; CHECK-NEXT: to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp
|
||||
; CHECK: bb5:
|
||||
; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 0
|
||||
; CHECK-NEXT: br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]]
|
||||
; CHECK: bb8:
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP6]], -1
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]]
|
||||
; CHECK: main.exit.selector:
|
||||
; CHECK-NEXT: [[TMP6_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[BB8]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]]
|
||||
; CHECK: main.pseudo.exit:
|
||||
; CHECK-NEXT: [[TMP4_COPY:%.*]] = phi i32 [ [[TMP4_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
|
||||
; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
|
||||
; CHECK-NEXT: br label [[POSTLOOP:%.*]]
|
||||
; CHECK: outer_exiting.loopexit:
|
||||
; CHECK-NEXT: [[LPAD_LOOPEXIT:%.*]] = landingpad { i8*, i32 }
|
||||
; CHECK-NEXT: cleanup
|
||||
; CHECK-NEXT: br label [[OUTER_EXITING:%.*]]
|
||||
; CHECK: outer_exiting.loopexit.split-lp.loopexit:
|
||||
; CHECK-NEXT: [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 }
|
||||
; CHECK-NEXT: cleanup
|
||||
; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp
|
||||
; CHECK: outer_exiting.loopexit.split-lp.loopexit.split-lp:
|
||||
; CHECK-NEXT: %lpad.loopexit.split-lp3 = landingpad { i8*, i32 }
|
||||
; CHECK-NEXT: cleanup
|
||||
; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp
|
||||
; CHECK: outer_exiting.loopexit.split-lp:
|
||||
; CHECK-NEXT: br label [[OUTER_EXITING]]
|
||||
; CHECK: outer_exiting:
|
||||
; CHECK-NEXT: switch i32 undef, label [[EXIT2:%.*]] [
|
||||
; CHECK-NEXT: i32 142, label [[BB14:%.*]]
|
||||
; CHECK-NEXT: i32 448, label [[EXIT:%.*]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: exit3.loopexit:
|
||||
; CHECK-NEXT: br label [[EXIT3:%.*]]
|
||||
; CHECK: exit3.loopexit4:
|
||||
; CHECK-NEXT: br label [[EXIT3]]
|
||||
; CHECK: exit3.loopexit5:
|
||||
; CHECK-NEXT: br label [[EXIT3]]
|
||||
; CHECK: exit3:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: bb13.loopexit:
|
||||
; CHECK-NEXT: br label [[BB13]]
|
||||
; CHECK: bb13:
|
||||
; CHECK-NEXT: unreachable
|
||||
; CHECK: bb14:
|
||||
; CHECK-NEXT: br label [[OUTERHEADER]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: bb16:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: exit2:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: innerheader.preloop:
|
||||
; CHECK-NEXT: [[TMP4_PRELOOP:%.*]] = phi i32 [ [[TMP6_PRELOOP:%.*]], [[BB8_PRELOOP:%.*]] ], [ undef, [[INNERHEADER_PRELOOP_PREHEADER]] ]
|
||||
; CHECK-NEXT: invoke void @pluto()
|
||||
; CHECK-NEXT: to label [[BB5_PRELOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT:%.*]]
|
||||
; CHECK: bb5.preloop:
|
||||
; CHECK-NEXT: [[TMP6_PRELOOP]] = add i32 [[TMP4_PRELOOP]], 1
|
||||
; CHECK-NEXT: [[TMP7_PRELOOP:%.*]] = icmp ult i32 [[TMP6_PRELOOP]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]]
|
||||
; CHECK: bb8.preloop:
|
||||
; CHECK-NEXT: [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], -1
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop !0, !irce.loop.clone !5
|
||||
; CHECK: preloop.exit.selector:
|
||||
; CHECK-NEXT: [[TMP6_PRELOOP_LCSSA:%.*]] = phi i32 [ [[TMP6_PRELOOP]], [[BB8_PRELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]]
|
||||
; CHECK: preloop.pseudo.exit:
|
||||
; CHECK-NEXT: [[TMP4_PRELOOP_COPY]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
|
||||
; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
|
||||
; CHECK-NEXT: br label [[MAINLOOP]]
|
||||
; CHECK: postloop:
|
||||
; CHECK-NEXT: br label [[INNERHEADER_POSTLOOP:%.*]]
|
||||
; CHECK: innerheader.postloop:
|
||||
; CHECK-NEXT: [[TMP4_POSTLOOP:%.*]] = phi i32 [ [[TMP6_POSTLOOP:%.*]], [[BB8_POSTLOOP:%.*]] ], [ [[TMP4_COPY]], [[POSTLOOP]] ]
|
||||
; CHECK-NEXT: invoke void @pluto()
|
||||
; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit
|
||||
; CHECK: bb5.postloop:
|
||||
; CHECK-NEXT: [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1
|
||||
; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp ult i32 [[TMP6_POSTLOOP]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]]
|
||||
; CHECK: bb8.postloop:
|
||||
; CHECK-NEXT: [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84
|
||||
; CHECK-NEXT: br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], !llvm.loop !6, !irce.loop.clone !5
|
||||
;
|
||||
bb:
|
||||
br label %outerheader
|
||||
|
||||
outerheader: ; preds = %bb14, %bb
|
||||
%tmp = icmp slt i32 undef, 84
|
||||
br i1 %tmp, label %bb2, label %bb16
|
||||
|
||||
bb2: ; preds = %outerheader
|
||||
br label %innerheader
|
||||
|
||||
innerheader: ; preds = %bb8, %bb2
|
||||
%tmp4 = phi i32 [ %tmp6, %bb8 ], [ undef, %bb2 ]
|
||||
invoke void @pluto()
|
||||
to label %bb5 unwind label %outer_exiting
|
||||
|
||||
bb5: ; preds = %innerheader
|
||||
%tmp6 = add i32 %tmp4, 1
|
||||
%tmp7 = icmp ult i32 %tmp6, 0
|
||||
br i1 %tmp7, label %bb8, label %exit3
|
||||
|
||||
bb8: ; preds = %bb5
|
||||
%tmp9 = icmp slt i32 %tmp6, 84
|
||||
br i1 %tmp9, label %innerheader, label %bb13
|
||||
|
||||
outer_exiting: ; preds = %innerheader
|
||||
%tmp11 = landingpad { i8*, i32 }
|
||||
cleanup
|
||||
switch i32 undef, label %exit2 [
|
||||
i32 142, label %bb14
|
||||
i32 448, label %exit
|
||||
]
|
||||
|
||||
exit3: ; preds = %bb5
|
||||
ret void
|
||||
|
||||
bb13: ; preds = %bb8
|
||||
unreachable
|
||||
|
||||
bb14: ; preds = %outer_exiting
|
||||
br label %outerheader
|
||||
|
||||
exit: ; preds = %outer_exiting
|
||||
ret void
|
||||
|
||||
bb16: ; preds = %outerheader
|
||||
ret void
|
||||
|
||||
exit2: ; preds = %outer_exiting
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32* @ham()
|
||||
|
||||
declare void @pluto()
|
||||
|
||||
!0 = distinct !{!0, !1, !2, !3, !4}
|
||||
!1 = !{!"llvm.loop.unroll.disable"}
|
||||
!2 = !{!"llvm.loop.vectorize.enable", i1 false}
|
||||
!3 = !{!"llvm.loop.licm_versioning.disable"}
|
||||
!4 = !{!"llvm.loop.distribute.enable", i1 false}
|
||||
!5 = !{}
|
||||
!6 = distinct !{!6, !1, !2, !3, !4}
|
Loading…
Reference in New Issue