[InstCombine] reduce vector casting before icmp

There may be some generalizations (see test comments) of these patterns, but this should handle the cases motivated by: https://llvm.org/PR51315 https://llvm.org/PR51259 The backend may want to transform differently, but at least for the x86 examples that I looked at, there does not appear to be any significant perf diff either way.
2021-08-06 16:36:32 -04:00 · 2021-08-06 16:36:32 -04:00 · 0369714b31
parent 67d499445d
commit 0369714b31
2 changed files with 32 additions and 14 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -2936,6 +2936,19 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
    return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
  }

+  // If this is checking if all elements of an extended vector are clear or not,
+  // compare in a narrow type to eliminate the extend:
+  // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
+  Value *X;
+  if (Cmp.isEquality() && C->isNullValue() && Bitcast->hasOneUse() &&
+      match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
+    if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
+      Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
+      Value *NewCast = Builder.CreateBitCast(X, NewType);
+      return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
+    }
+  }
+
  // Folding: icmp <pred> iN X, C
  //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
  //    and C is a splat of a K-bit pattern
--- a/llvm/test/Transforms/InstCombine/icmp-vec.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll
@ -585,9 +585,8 @@ define i1 @eq_cast_eq-1_use2(<2 x i4> %x, <2 x i4> %y, i2* %p) {

 define i1 @ne_cast_sext(<3 x i1> %b) {
 ; CHECK-LABEL: @ne_cast_sext(
-; CHECK-NEXT:    [[E:%.*]] = sext <3 x i1> [[B:%.*]] to <3 x i8>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <3 x i8> [[E]] to i24
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i24 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x i1> [[B:%.*]] to i3
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i3 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
  %e = sext <3 x i1> %b to <3 x i8>
@ -598,9 +597,8 @@ define i1 @ne_cast_sext(<3 x i1> %b) {

 define i1 @eq_cast_sext(<8 x i3> %b) {
 ; CHECK-LABEL: @eq_cast_sext(
-; CHECK-NEXT:    [[E:%.*]] = sext <8 x i3> [[B:%.*]] to <8 x i8>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i8> [[E]] to i64
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i3> [[B:%.*]] to i24
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i24 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
  %e = sext <8 x i3> %b to <8 x i8>
@ -611,9 +609,8 @@ define i1 @eq_cast_sext(<8 x i3> %b) {

 define i1 @ne_cast_zext(<4 x i1> %b) {
 ; CHECK-LABEL: @ne_cast_zext(
-; CHECK-NEXT:    [[E:%.*]] = zext <4 x i1> [[B:%.*]] to <4 x i8>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i8> [[E]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[B:%.*]] to i4
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i4 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
  %e = zext <4 x i1> %b to <4 x i8>
@ -624,9 +621,8 @@ define i1 @ne_cast_zext(<4 x i1> %b) {

 define i1 @eq_cast_zext(<5 x i3> %b) {
 ; CHECK-LABEL: @eq_cast_zext(
-; CHECK-NEXT:    [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i35 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <5 x i3> [[B:%.*]] to i15
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i15 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
  %e = zext <5 x i3> %b to <5 x i7>
@ -635,6 +631,8 @@ define i1 @eq_cast_zext(<5 x i3> %b) {
  ret i1 %r
 }

+; negative test - valid for eq/ne only
+
 define i1 @sgt_cast_zext(<5 x i3> %b) {
 ; CHECK-LABEL: @sgt_cast_zext(
 ; CHECK-NEXT:    [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
@ -648,6 +646,9 @@ define i1 @sgt_cast_zext(<5 x i3> %b) {
  ret i1 %r
 }

+; negative test - not valid with non-zero constants
+; TODO: We could handle some non-zero constants by checking for bit-loss after casts.
+
 define i1 @eq7_cast_sext(<5 x i3> %b) {
 ; CHECK-LABEL: @eq7_cast_sext(
 ; CHECK-NEXT:    [[E:%.*]] = sext <5 x i3> [[B:%.*]] to <5 x i7>
@ -661,12 +662,14 @@ define i1 @eq7_cast_sext(<5 x i3> %b) {
  ret i1 %r
 }

+; extra use of extend is ok
+
 define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
 ; CHECK-LABEL: @eq_cast_zext_use1(
 ; CHECK-NEXT:    [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
 ; CHECK-NEXT:    store <5 x i7> [[E]], <5 x i7>* [[P:%.*]], align 8
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i35 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <5 x i3> [[B]] to i15
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i15 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
  %e = zext <5 x i3> %b to <5 x i7>
@ -676,6 +679,8 @@ define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
  ret i1 %r
 }

+; negative test - don't create an extra cast
+
 declare void @use35(i35)

 define i1 @eq_cast_zext_use2(<5 x i3> %b) {