forked from OSchip/llvm-project
[InstCombine] reduce vector casting before icmp
There may be some generalizations (see test comments) of these patterns, but this should handle the cases motivated by: https://llvm.org/PR51315 https://llvm.org/PR51259 The backend may want to transform differently, but at least for the x86 examples that I looked at, there does not appear to be any significant perf diff either way.
This commit is contained in:
parent
67d499445d
commit
0369714b31
|
@ -2936,6 +2936,19 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
|
|||
return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
|
||||
}
|
||||
|
||||
// If this is checking if all elements of an extended vector are clear or not,
|
||||
// compare in a narrow type to eliminate the extend:
|
||||
// icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
|
||||
Value *X;
|
||||
if (Cmp.isEquality() && C->isNullValue() && Bitcast->hasOneUse() &&
|
||||
match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
|
||||
if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
|
||||
Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
|
||||
Value *NewCast = Builder.CreateBitCast(X, NewType);
|
||||
return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
|
||||
}
|
||||
}
|
||||
|
||||
// Folding: icmp <pred> iN X, C
|
||||
// where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
|
||||
// and C is a splat of a K-bit pattern
|
||||
|
|
|
@ -585,9 +585,8 @@ define i1 @eq_cast_eq-1_use2(<2 x i4> %x, <2 x i4> %y, i2* %p) {
|
|||
|
||||
define i1 @ne_cast_sext(<3 x i1> %b) {
|
||||
; CHECK-LABEL: @ne_cast_sext(
|
||||
; CHECK-NEXT: [[E:%.*]] = sext <3 x i1> [[B:%.*]] to <3 x i8>
|
||||
; CHECK-NEXT: [[BC:%.*]] = bitcast <3 x i8> [[E]] to i24
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i24 [[BC]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i1> [[B:%.*]] to i3
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i3 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%e = sext <3 x i1> %b to <3 x i8>
|
||||
|
@ -598,9 +597,8 @@ define i1 @ne_cast_sext(<3 x i1> %b) {
|
|||
|
||||
define i1 @eq_cast_sext(<8 x i3> %b) {
|
||||
; CHECK-LABEL: @eq_cast_sext(
|
||||
; CHECK-NEXT: [[E:%.*]] = sext <8 x i3> [[B:%.*]] to <8 x i8>
|
||||
; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[E]] to i64
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[BC]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i3> [[B:%.*]] to i24
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i24 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%e = sext <8 x i3> %b to <8 x i8>
|
||||
|
@ -611,9 +609,8 @@ define i1 @eq_cast_sext(<8 x i3> %b) {
|
|||
|
||||
define i1 @ne_cast_zext(<4 x i1> %b) {
|
||||
; CHECK-LABEL: @ne_cast_zext(
|
||||
; CHECK-NEXT: [[E:%.*]] = zext <4 x i1> [[B:%.*]] to <4 x i8>
|
||||
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i8> [[E]] to i32
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[BC]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[B:%.*]] to i4
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp ne i4 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%e = zext <4 x i1> %b to <4 x i8>
|
||||
|
@ -624,9 +621,8 @@ define i1 @ne_cast_zext(<4 x i1> %b) {
|
|||
|
||||
define i1 @eq_cast_zext(<5 x i3> %b) {
|
||||
; CHECK-LABEL: @eq_cast_zext(
|
||||
; CHECK-NEXT: [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
|
||||
; CHECK-NEXT: [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i35 [[BC]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <5 x i3> [[B:%.*]] to i15
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i15 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%e = zext <5 x i3> %b to <5 x i7>
|
||||
|
@ -635,6 +631,8 @@ define i1 @eq_cast_zext(<5 x i3> %b) {
|
|||
ret i1 %r
|
||||
}
|
||||
|
||||
; negative test - valid for eq/ne only
|
||||
|
||||
define i1 @sgt_cast_zext(<5 x i3> %b) {
|
||||
; CHECK-LABEL: @sgt_cast_zext(
|
||||
; CHECK-NEXT: [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
|
||||
|
@ -648,6 +646,9 @@ define i1 @sgt_cast_zext(<5 x i3> %b) {
|
|||
ret i1 %r
|
||||
}
|
||||
|
||||
; negative test - not valid with non-zero constants
|
||||
; TODO: We could handle some non-zero constants by checking for bit-loss after casts.
|
||||
|
||||
define i1 @eq7_cast_sext(<5 x i3> %b) {
|
||||
; CHECK-LABEL: @eq7_cast_sext(
|
||||
; CHECK-NEXT: [[E:%.*]] = sext <5 x i3> [[B:%.*]] to <5 x i7>
|
||||
|
@ -661,12 +662,14 @@ define i1 @eq7_cast_sext(<5 x i3> %b) {
|
|||
ret i1 %r
|
||||
}
|
||||
|
||||
; extra use of extend is ok
|
||||
|
||||
define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
|
||||
; CHECK-LABEL: @eq_cast_zext_use1(
|
||||
; CHECK-NEXT: [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
|
||||
; CHECK-NEXT: store <5 x i7> [[E]], <5 x i7>* [[P:%.*]], align 8
|
||||
; CHECK-NEXT: [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i35 [[BC]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <5 x i3> [[B]] to i15
|
||||
; CHECK-NEXT: [[R:%.*]] = icmp eq i15 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[R]]
|
||||
;
|
||||
%e = zext <5 x i3> %b to <5 x i7>
|
||||
|
@ -676,6 +679,8 @@ define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
|
|||
ret i1 %r
|
||||
}
|
||||
|
||||
; negative test - don't create an extra cast
|
||||
|
||||
declare void @use35(i35)
|
||||
|
||||
define i1 @eq_cast_zext_use2(<5 x i3> %b) {
|
||||
|
|
Loading…
Reference in New Issue