[LV] Use exiting block instead of latch in addUsersInExitBlock.

The latch may not be the exiting block. Use the exiting block instead
when looking up the incoming value of the LCSSA phi node. This fixes a
crash with early-exit loops.
This commit is contained in:
Florian Hahn 2022-05-22 18:27:41 +01:00
parent c230ab6db8
commit 145fe57106
No known key found for this signature in database
GPG Key ID: CF59919C6547A668
2 changed files with 93 additions and 18 deletions

View File

@ -8703,14 +8703,15 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB,
VPBasicBlock *MiddleVPBB, Loop *OrigLoop,
VPlan &Plan) {
BasicBlock *ExitBB = OrigLoop->getUniqueExitBlock();
BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
// Only handle single-exit loops with unique exit blocks for now.
if (!ExitBB || !ExitBB->getSinglePredecessor())
if (!ExitBB || !ExitBB->getSinglePredecessor() || !ExitingBB)
return;
// Introduce VPUsers modeling the exit values.
for (PHINode &ExitPhi : ExitBB->phis()) {
Value *IncomingValue =
ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch());
ExitPhi.getIncomingValueForBlock(ExitingBB);
VPValue *V = Plan.getOrAddVPValue(IncomingValue, true);
Plan.addLiveOut(&ExitPhi, V);
}

View File

@ -190,6 +190,80 @@ if.end:
ret void
}
define i32 @early_exit_with_live_out(i32* %ptr) {
; CHECK-LABEL: @early_exit_with_live_out(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> <i32 10, i32 10>, <2 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[IV]]
; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: store i32 10, i32* [[GEP]], align 4
; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ]
; CHECK-NEXT: ret i32 [[RES_LCSSA]]
;
; TAILFOLD-LABEL: @early_exit_with_live_out(
; TAILFOLD-NEXT: entry:
; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]]
; TAILFOLD: loop.header:
; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[IV]]
; TAILFOLD-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4
; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; TAILFOLD-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
; TAILFOLD-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
; TAILFOLD: loop.latch:
; TAILFOLD-NEXT: store i32 10, i32* [[GEP]], align 4
; TAILFOLD-NEXT: br label [[LOOP_HEADER]]
; TAILFOLD: exit:
; TAILFOLD-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ]
; TAILFOLD-NEXT: ret i32 [[RES_LCSSA]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep = getelementptr i32, i32* %ptr, i64 %iv
%l = load i32, i32* %gep
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 1000
br i1 %ec, label %exit, label %loop.latch
loop.latch:
store i32 10, i32* %gep
br label %loop.header
exit:
%res.lcssa = phi i32 [ %l, %loop.header ]
ret i32 %res.lcssa
}
; Same as early_exit, but with optsize to prevent the use of
; a scalar epilogue. -- Can't vectorize this in either case.
define void @optsize(i16* %p, i32 %n) optsize {
@ -270,7 +344,7 @@ define void @multiple_unique_exit(i16* %p, i32 %n) {
; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -286,7 +360,7 @@ define void @multiple_unique_exit(i16* %p, i32 %n) {
; CHECK-NEXT: store i16 0, i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
@ -352,7 +426,7 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -368,7 +442,7 @@ define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
; CHECK-NEXT: store i16 0, i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
; CHECK-NEXT: ret i32 [[I_LCSSA]]
@ -436,7 +510,7 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -452,7 +526,7 @@ define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
; CHECK-NEXT: store i16 0, i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[EXIT]]
@ -521,7 +595,7 @@ define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -537,7 +611,7 @@ define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
; CHECK-NEXT: store i16 0, i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: ret i32 0
; CHECK: if.end2:
@ -610,7 +684,7 @@ define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -626,7 +700,7 @@ define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
; CHECK-NEXT: store i16 0, i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
; CHECK-NEXT: ret i32 [[I_LCSSA]]
@ -706,7 +780,7 @@ define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -722,7 +796,7 @@ define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
; CHECK-NEXT: store i16 0, i16* [[B]], align 4
; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
; CHECK-NEXT: ret i32 [[I_LCSSA]]
@ -1049,7 +1123,7 @@ define void @scalar_predication(float* %addr) {
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@ -1069,7 +1143,7 @@ define void @scalar_predication(float* %addr) {
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@ -1137,7 +1211,7 @@ define i32 @me_reduction(i32* %addr) {
; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP4]])
; CHECK-NEXT: br label [[SCALAR_PH]]
@ -1156,7 +1230,7 @@ define i32 @me_reduction(i32* %addr) {
; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP7]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
; CHECK-NEXT: ret i32 [[LCSSA]]