[IndVars] ICmpInst should not prevent IV widening

If we decided to widen IV with zext, then unsigned comparisons
should not prevent widening (same for sext/sign comparisons).
The result of comparison in wider type does not change in this case.

Differential Revision: https://reviews.llvm.org/D92207
Reviewed By: nikic
This commit is contained in:
Max Kazantsev 2020-11-30 10:51:31 +07:00
parent 1db60c1307
commit 0c9c6ddf17
2 changed files with 52 additions and 16 deletions

View File

@ -1541,10 +1541,14 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
auto AnotherOpExtKind = ExtKind; auto AnotherOpExtKind = ExtKind;
// Check that all uses are either s/zext, or narrow def (in case of we are // Check that all uses are either:
// widening the IV increment), or single-input LCSSA Phis. // - narrow def (in case of we are widening the IV increment);
// - single-input LCSSA Phis;
// - comparison of the chosen type;
// - extend of the chosen type (raison d'etre).
SmallVector<Instruction *, 4> ExtUsers; SmallVector<Instruction *, 4> ExtUsers;
SmallVector<PHINode *, 4> LCSSAPhiUsers; SmallVector<PHINode *, 4> LCSSAPhiUsers;
SmallVector<ICmpInst *, 4> ICmpUsers;
for (Use &U : NarrowUse->uses()) { for (Use &U : NarrowUse->uses()) {
Instruction *User = cast<Instruction>(U.getUser()); Instruction *User = cast<Instruction>(U.getUser());
if (User == NarrowDef) if (User == NarrowDef)
@ -1558,6 +1562,19 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
LCSSAPhiUsers.push_back(LCSSAPhi); LCSSAPhiUsers.push_back(LCSSAPhi);
continue; continue;
} }
if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
auto Pred = ICmp->getPredicate();
// We have 3 types of predicates: signed, unsigned and equality
// predicates. For equality, it's legal to widen icmp for either sign and
// zero extend. For sign extend, we can also do so for signed predicates,
// likeweise for zero extend we can widen icmp for unsigned predicates.
if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
return false;
if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
return false;
ICmpUsers.push_back(ICmp);
continue;
}
if (ExtKind == SignExtended) if (ExtKind == SignExtended)
User = dyn_cast<SExtInst>(User); User = dyn_cast<SExtInst>(User);
else else
@ -1655,6 +1672,26 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
User->replaceAllUsesWith(TruncPN); User->replaceAllUsesWith(TruncPN);
DeadInsts.emplace_back(User); DeadInsts.emplace_back(User);
} }
for (ICmpInst *User : ICmpUsers) {
Builder.SetInsertPoint(User);
auto ExtendedOp = [&](Value * V)->Value * {
if (V == NarrowUse)
return WideBO;
if (ExtKind == ZeroExtended)
return Builder.CreateZExt(V, WideBO->getType());
else
return Builder.CreateSExt(V, WideBO->getType());
};
auto Pred = User->getPredicate();
auto *LHS = ExtendedOp(User->getOperand(0));
auto *RHS = ExtendedOp(User->getOperand(1));
auto *WideCmp =
Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
User->replaceAllUsesWith(WideCmp);
DeadInsts.emplace_back(User);
}
return true; return true;
} }

View File

@ -795,37 +795,36 @@ failure:
unreachable unreachable
} }
; TODO: We can widen here despite the icmp user of %foo in guarded block.
define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) { define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
; CHECK-LABEL: @test16_unsigned_pos1( ; CHECK-LABEL: @test16_unsigned_pos1(
; CHECK-NEXT: entry: ; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[X:%.*]] to i64
; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop: ; CHECK: loop:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP2]], -1
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
; CHECK: guarded: ; CHECK: guarded:
; CHECK-NEXT: [[ICMP_USER3:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: [[ICMP_USER_WIDE4:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
; CHECK-NEXT: br i1 [[ICMP_USER3]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] ; CHECK-NEXT: br i1 [[ICMP_USER_WIDE4]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]]
; CHECK: backedge: ; CHECK: backedge:
; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 ; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
; CHECK: exit: ; CHECK: exit:
; CHECK-NEXT: call void @use(i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 -1 to i32
; CHECK-NEXT: ret i32 -1 ; CHECK-NEXT: call void @use(i32 [[TMP4]])
; CHECK-NEXT: ret i32 [[TMP4]]
; CHECK: failure: ; CHECK: failure:
; CHECK-NEXT: [[FOO_LCSSA2:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ] ; CHECK-NEXT: [[FOO_LCSSA2_WIDE:%.*]] = phi i64 [ [[TMP3]], [[BACKEDGE]] ]
; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA2]]) ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[FOO_LCSSA2_WIDE]] to i32
; CHECK-NEXT: call void @use(i32 [[TMP5]])
; CHECK-NEXT: unreachable ; CHECK-NEXT: unreachable
; CHECK: side_exit: ; CHECK: side_exit:
; CHECK-NEXT: ret i32 0 ; CHECK-NEXT: ret i32 0