forked from OSchip/llvm-project
[IndVars] ICmpInst should not prevent IV widening
If we decided to widen IV with zext, then unsigned comparisons should not prevent widening (same for sext/sign comparisons). The result of comparison in wider type does not change in this case. Differential Revision: https://reviews.llvm.org/D92207 Reviewed By: nikic
This commit is contained in:
parent
1db60c1307
commit
0c9c6ddf17
|
@ -1541,10 +1541,14 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
|
||||||
bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
|
bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
|
||||||
auto AnotherOpExtKind = ExtKind;
|
auto AnotherOpExtKind = ExtKind;
|
||||||
|
|
||||||
// Check that all uses are either s/zext, or narrow def (in case of we are
|
// Check that all uses are either:
|
||||||
// widening the IV increment), or single-input LCSSA Phis.
|
// - narrow def (in case of we are widening the IV increment);
|
||||||
|
// - single-input LCSSA Phis;
|
||||||
|
// - comparison of the chosen type;
|
||||||
|
// - extend of the chosen type (raison d'etre).
|
||||||
SmallVector<Instruction *, 4> ExtUsers;
|
SmallVector<Instruction *, 4> ExtUsers;
|
||||||
SmallVector<PHINode *, 4> LCSSAPhiUsers;
|
SmallVector<PHINode *, 4> LCSSAPhiUsers;
|
||||||
|
SmallVector<ICmpInst *, 4> ICmpUsers;
|
||||||
for (Use &U : NarrowUse->uses()) {
|
for (Use &U : NarrowUse->uses()) {
|
||||||
Instruction *User = cast<Instruction>(U.getUser());
|
Instruction *User = cast<Instruction>(U.getUser());
|
||||||
if (User == NarrowDef)
|
if (User == NarrowDef)
|
||||||
|
@ -1558,6 +1562,19 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
|
||||||
LCSSAPhiUsers.push_back(LCSSAPhi);
|
LCSSAPhiUsers.push_back(LCSSAPhi);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (auto *ICmp = dyn_cast<ICmpInst>(User)) {
|
||||||
|
auto Pred = ICmp->getPredicate();
|
||||||
|
// We have 3 types of predicates: signed, unsigned and equality
|
||||||
|
// predicates. For equality, it's legal to widen icmp for either sign and
|
||||||
|
// zero extend. For sign extend, we can also do so for signed predicates,
|
||||||
|
// likeweise for zero extend we can widen icmp for unsigned predicates.
|
||||||
|
if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
|
||||||
|
return false;
|
||||||
|
if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
|
||||||
|
return false;
|
||||||
|
ICmpUsers.push_back(ICmp);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (ExtKind == SignExtended)
|
if (ExtKind == SignExtended)
|
||||||
User = dyn_cast<SExtInst>(User);
|
User = dyn_cast<SExtInst>(User);
|
||||||
else
|
else
|
||||||
|
@ -1655,6 +1672,26 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
|
||||||
User->replaceAllUsesWith(TruncPN);
|
User->replaceAllUsesWith(TruncPN);
|
||||||
DeadInsts.emplace_back(User);
|
DeadInsts.emplace_back(User);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (ICmpInst *User : ICmpUsers) {
|
||||||
|
Builder.SetInsertPoint(User);
|
||||||
|
auto ExtendedOp = [&](Value * V)->Value * {
|
||||||
|
if (V == NarrowUse)
|
||||||
|
return WideBO;
|
||||||
|
if (ExtKind == ZeroExtended)
|
||||||
|
return Builder.CreateZExt(V, WideBO->getType());
|
||||||
|
else
|
||||||
|
return Builder.CreateSExt(V, WideBO->getType());
|
||||||
|
};
|
||||||
|
auto Pred = User->getPredicate();
|
||||||
|
auto *LHS = ExtendedOp(User->getOperand(0));
|
||||||
|
auto *RHS = ExtendedOp(User->getOperand(1));
|
||||||
|
auto *WideCmp =
|
||||||
|
Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
|
||||||
|
User->replaceAllUsesWith(WideCmp);
|
||||||
|
DeadInsts.emplace_back(User);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -795,37 +795,36 @@ failure:
|
||||||
unreachable
|
unreachable
|
||||||
}
|
}
|
||||||
|
|
||||||
; TODO: We can widen here despite the icmp user of %foo in guarded block.
|
|
||||||
define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
|
define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
|
||||||
; CHECK-LABEL: @test16_unsigned_pos1(
|
; CHECK-LABEL: @test16_unsigned_pos1(
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
|
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], -1
|
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[X:%.*]] to i64
|
||||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||||
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
|
; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1
|
||||||
; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP2]], -1
|
|
||||||
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
|
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
|
||||||
; CHECK: guarded:
|
; CHECK: guarded:
|
||||||
; CHECK-NEXT: [[ICMP_USER3:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]]
|
; CHECK-NEXT: [[ICMP_USER_WIDE4:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
|
||||||
; CHECK-NEXT: br i1 [[ICMP_USER3]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]]
|
; CHECK-NEXT: br i1 [[ICMP_USER_WIDE4]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]]
|
||||||
; CHECK: backedge:
|
; CHECK: backedge:
|
||||||
; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64
|
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP3]]
|
||||||
; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]]
|
|
||||||
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
|
; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4
|
||||||
; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]]
|
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
|
||||||
; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4
|
|
||||||
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
|
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
|
||||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
|
||||||
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
|
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
|
||||||
; CHECK: exit:
|
; CHECK: exit:
|
||||||
; CHECK-NEXT: call void @use(i32 -1)
|
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 -1 to i32
|
||||||
; CHECK-NEXT: ret i32 -1
|
; CHECK-NEXT: call void @use(i32 [[TMP4]])
|
||||||
|
; CHECK-NEXT: ret i32 [[TMP4]]
|
||||||
; CHECK: failure:
|
; CHECK: failure:
|
||||||
; CHECK-NEXT: [[FOO_LCSSA2:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ]
|
; CHECK-NEXT: [[FOO_LCSSA2_WIDE:%.*]] = phi i64 [ [[TMP3]], [[BACKEDGE]] ]
|
||||||
; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA2]])
|
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[FOO_LCSSA2_WIDE]] to i32
|
||||||
|
; CHECK-NEXT: call void @use(i32 [[TMP5]])
|
||||||
; CHECK-NEXT: unreachable
|
; CHECK-NEXT: unreachable
|
||||||
; CHECK: side_exit:
|
; CHECK: side_exit:
|
||||||
; CHECK-NEXT: ret i32 0
|
; CHECK-NEXT: ret i32 0
|
||||||
|
|
Loading…
Reference in New Issue