[IndVars] LCSSA Phi users should not prevent widening

When widening an IndVar that has LCSSA Phi users outside
the loop, we can safely widen it as usual and then truncate
the result outside the loop without hurting the performance.

Differential Revision: https://reviews.llvm.org/D91593
Reviewed By: skatkov
This commit is contained in:
Max Kazantsev 2020-11-27 11:19:54 +07:00
parent abfcb606c2
commit faf183874c
2 changed files with 44 additions and 22 deletions

View File

@ -1542,16 +1542,26 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
auto AnotherOpExtKind = ExtKind;
// Check that all uses are either s/zext, or narrow def (in case of we are
// widening the IV increment).
// widening the IV increment), or single-input LCSSA Phis.
SmallVector<Instruction *, 4> ExtUsers;
SmallVector<PHINode *, 4> LCSSAPhiUsers;
for (Use &U : NarrowUse->uses()) {
if (U.getUser() == NarrowDef)
Instruction *User = cast<Instruction>(U.getUser());
if (User == NarrowDef)
continue;
Instruction *User = nullptr;
if (!L->contains(User)) {
auto *LCSSAPhi = cast<PHINode>(User);
// Make sure there is only 1 input, so that we don't have to split
// critical edges.
if (LCSSAPhi->getNumOperands() != 1)
return false;
LCSSAPhiUsers.push_back(LCSSAPhi);
continue;
}
if (ExtKind == SignExtended)
User = dyn_cast<SExtInst>(U.getUser());
User = dyn_cast<SExtInst>(User);
else
User = dyn_cast<ZExtInst>(U.getUser());
User = dyn_cast<ZExtInst>(User);
if (!User || User->getType() != WideType)
return false;
ExtUsers.push_back(User);
@ -1630,6 +1640,21 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
User->replaceAllUsesWith(WideBO);
DeadInsts.emplace_back(User);
}
for (PHINode *User : LCSSAPhiUsers) {
assert(User->getNumOperands() == 1 && "Checked before!");
Builder.SetInsertPoint(User);
auto *WidePN =
Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
"Not a LCSSA Phi?");
WidePN->addIncoming(WideBO, LoopExitingBlock);
Builder.SetInsertPoint(User->getParent()->getFirstNonPHI());
auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
User->replaceAllUsesWith(TruncPN);
DeadInsts.emplace_back(User);
}
return true;
}

View File

@ -697,20 +697,18 @@ define i32 @test14(i32 %start, i32* %p, i32* %q) {
; CHECK: loop:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]]
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 -1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK: failure:
; CHECK-NEXT: unreachable
;
@ -750,24 +748,23 @@ define i32 @test15(i32 %start, i32* %p, i32* %q) {
; CHECK: loop:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]]
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
; CHECK: exit:
; CHECK-NEXT: call void @use(i32 -1)
; CHECK-NEXT: ret i32 -1
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32
; CHECK-NEXT: call void @use(i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK: failure:
; CHECK-NEXT: [[FOO_LCSSA1:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ]
; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA1]])
; CHECK-NEXT: [[FOO_LCSSA1_WIDE:%.*]] = phi i64 [ [[TMP1]], [[BACKEDGE]] ]
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[FOO_LCSSA1_WIDE]] to i32
; CHECK-NEXT: call void @use(i32 [[TMP3]])
; CHECK-NEXT: unreachable
;
entry: