[InstCombine] reduce vector casting before icmp

There may be some generalizations (see test comments) of these patterns,
but this should handle the cases motivated by:
https://llvm.org/PR51315
https://llvm.org/PR51259

The backend may want to transform differently, but at least for
the x86 examples that I looked at, there does not appear to be
any significant perf diff either way.
This commit is contained in:
Sanjay Patel 2021-08-06 16:36:32 -04:00
parent 67d499445d
commit 0369714b31
2 changed files with 32 additions and 14 deletions

View File

@ -2936,6 +2936,19 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
}
// If this is checking if all elements of an extended vector are clear or not,
// compare in a narrow type to eliminate the extend:
// icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
Value *X;
if (Cmp.isEquality() && C->isNullValue() && Bitcast->hasOneUse() &&
match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
Value *NewCast = Builder.CreateBitCast(X, NewType);
return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
}
}
// Folding: icmp <pred> iN X, C
// where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
// and C is a splat of a K-bit pattern

View File

@ -585,9 +585,8 @@ define i1 @eq_cast_eq-1_use2(<2 x i4> %x, <2 x i4> %y, i2* %p) {
define i1 @ne_cast_sext(<3 x i1> %b) {
; CHECK-LABEL: @ne_cast_sext(
; CHECK-NEXT: [[E:%.*]] = sext <3 x i1> [[B:%.*]] to <3 x i8>
; CHECK-NEXT: [[BC:%.*]] = bitcast <3 x i8> [[E]] to i24
; CHECK-NEXT: [[R:%.*]] = icmp ne i24 [[BC]], 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i1> [[B:%.*]] to i3
; CHECK-NEXT: [[R:%.*]] = icmp ne i3 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%e = sext <3 x i1> %b to <3 x i8>
@ -598,9 +597,8 @@ define i1 @ne_cast_sext(<3 x i1> %b) {
define i1 @eq_cast_sext(<8 x i3> %b) {
; CHECK-LABEL: @eq_cast_sext(
; CHECK-NEXT: [[E:%.*]] = sext <8 x i3> [[B:%.*]] to <8 x i8>
; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[E]] to i64
; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[BC]], 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i3> [[B:%.*]] to i24
; CHECK-NEXT: [[R:%.*]] = icmp eq i24 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%e = sext <8 x i3> %b to <8 x i8>
@ -611,9 +609,8 @@ define i1 @eq_cast_sext(<8 x i3> %b) {
define i1 @ne_cast_zext(<4 x i1> %b) {
; CHECK-LABEL: @ne_cast_zext(
; CHECK-NEXT: [[E:%.*]] = zext <4 x i1> [[B:%.*]] to <4 x i8>
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i8> [[E]] to i32
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[BC]], 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[B:%.*]] to i4
; CHECK-NEXT: [[R:%.*]] = icmp ne i4 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%e = zext <4 x i1> %b to <4 x i8>
@ -624,9 +621,8 @@ define i1 @ne_cast_zext(<4 x i1> %b) {
define i1 @eq_cast_zext(<5 x i3> %b) {
; CHECK-LABEL: @eq_cast_zext(
; CHECK-NEXT: [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
; CHECK-NEXT: [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
; CHECK-NEXT: [[R:%.*]] = icmp eq i35 [[BC]], 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <5 x i3> [[B:%.*]] to i15
; CHECK-NEXT: [[R:%.*]] = icmp eq i15 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%e = zext <5 x i3> %b to <5 x i7>
@ -635,6 +631,8 @@ define i1 @eq_cast_zext(<5 x i3> %b) {
ret i1 %r
}
; negative test - valid for eq/ne only
define i1 @sgt_cast_zext(<5 x i3> %b) {
; CHECK-LABEL: @sgt_cast_zext(
; CHECK-NEXT: [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
@ -648,6 +646,9 @@ define i1 @sgt_cast_zext(<5 x i3> %b) {
ret i1 %r
}
; negative test - not valid with non-zero constants
; TODO: We could handle some non-zero constants by checking for bit-loss after casts.
define i1 @eq7_cast_sext(<5 x i3> %b) {
; CHECK-LABEL: @eq7_cast_sext(
; CHECK-NEXT: [[E:%.*]] = sext <5 x i3> [[B:%.*]] to <5 x i7>
@ -661,12 +662,14 @@ define i1 @eq7_cast_sext(<5 x i3> %b) {
ret i1 %r
}
; extra use of extend is ok
define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
; CHECK-LABEL: @eq_cast_zext_use1(
; CHECK-NEXT: [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
; CHECK-NEXT: store <5 x i7> [[E]], <5 x i7>* [[P:%.*]], align 8
; CHECK-NEXT: [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
; CHECK-NEXT: [[R:%.*]] = icmp eq i35 [[BC]], 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <5 x i3> [[B]] to i15
; CHECK-NEXT: [[R:%.*]] = icmp eq i15 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%e = zext <5 x i3> %b to <5 x i7>
@ -676,6 +679,8 @@ define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
ret i1 %r
}
; negative test - don't create an extra cast
declare void @use35(i35)
define i1 @eq_cast_zext_use2(<5 x i3> %b) {