[InstCombine] Support vector splats in transformZExtICmp

This patch adds splat support to transformZExtICmp. The test cases are vector versions of tests that failed when commenting out parts of the existing scalar code. One test didn't vectorize optimize properly due to another bug so a TODO has been added. Differential Revision: https://reviews.llvm.org/D37253 llvm-svn: 312023
2017-08-29 18:58:13 +00:00 · 2017-08-29 18:58:13 +00:00 · 4431bfe88c
parent e7becd7e85
commit 4431bfe88c
4 changed files with 93 additions and 7 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@ -772,13 +772,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
  // If we are just checking for a icmp eq of a single bit and zext'ing it
  // to an integer, then shift the bit to the appropriate place and then
  // cast to integer to avoid the comparison.
-  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+  const APInt *Op1CV;
-    const APInt &Op1CV = Op1C->getValue();
+  if (match(ICI->getOperand(1), m_APInt(Op1CV))) {
    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
-    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) ||
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
-        (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
      if (!DoTransform) return ICI;
      Value *In = ICI->getOperand(0);
@ -804,7 +804,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) &&
+    if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
        // This only works for EQ and NE
        ICI->isEquality()) {
      // If Op1C some other power of two, convert:
@ -815,7 +815,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
        if (!DoTransform) return ICI;
        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
-        if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) {
+        if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
          // (X&4) == 2 --> false
          // (X&4) != 2 --> true
          Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
@ -833,7 +833,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
                                  In->getName() + ".lobit");
        }
-        if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit.
+        if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
          Constant *One = ConstantInt::get(In->getType(), 1);
          In = Builder.CreateXor(In, One);
        }
--- a/llvm/test/Transforms/InstCombine/compare-signs.ll
+++ b/llvm/test/Transforms/InstCombine/compare-signs.ll
@ -48,6 +48,22 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
  ret i32 %t3
 }
 ; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality.
 define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 ; CHECK-LABEL: @test3vec(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
 ; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 31, i32 31>
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
 ; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
  %t0 = lshr <2 x i32> %a, <i32 31, i32 31>
  %t1 = lshr <2 x i32> %b, <i32 31, i32 31>
  %t2 = icmp eq <2 x i32> %t0, %t1
  %t3 = zext <2 x i1> %t2 to <2 x i32>
  ret <2 x i32> %t3
 }
 ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
 ; is one, not zero.
 define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@ -13,6 +13,16 @@ define i32 @test1(i32 %X) {
  ret i32 %b
 }
 define <2 x i32> @test1vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test1vec(
 ; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
 ; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT]]
 ;
  %a = icmp slt <2 x i32> %X, zeroinitializer
  %b = zext <2 x i1> %a to <2 x i32>
  ret <2 x i32> %b
 }
 define i32 @test2(i32 %X) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i32 %X, 31
@ -24,6 +34,17 @@ define i32 @test2(i32 %X) {
  ret i32 %b
 }
 define <2 x i32> @test2vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test2vec(
 ; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
 ; CHECK-NEXT:    [[X_LOBIT_NOT:%.*]] = xor <2 x i32> [[X_LOBIT]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT_NOT]]
 ;
  %a = icmp ult <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
  %b = zext <2 x i1> %a to <2 x i32>
  ret <2 x i32> %b
 }
 define i32 @test3(i32 %X) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 %X, 31
--- a/llvm/test/Transforms/InstCombine/set.ll
+++ b/llvm/test/Transforms/InstCombine/set.ll
@ -282,6 +282,17 @@ define i32 @test20(i32 %A) {
  ret i32 %D
 }
 define <2 x i32> @test20vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test20vec(
 ; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
  %B = and <2 x i32> %A, <i32 1, i32 1>
  %C = icmp ne <2 x i32> %B, zeroinitializer
  %D = zext <2 x i1> %C to <2 x i32>
  ret <2 x i32> %D
 }
 define i32 @test21(i32 %a) {
 ; CHECK-LABEL: @test21(
 ; CHECK-NEXT:    [[TMP_6:%.*]] = lshr i32 %a, 2
@ -294,6 +305,18 @@ define i32 @test21(i32 %a) {
  ret i32 %retval
 }
 define <2 x i32> @test21vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test21vec(
 ; CHECK-NEXT:    [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP_6_LOBIT]]
 ;
  %tmp.6 = and <2 x i32> %a, <i32 4, i32 4>
  %not.tmp.7 = icmp ne <2 x i32> %tmp.6, zeroinitializer
  %retval = zext <2 x i1> %not.tmp.7 to <2 x i32>
  ret <2 x i32> %retval
 }
 define i1 @test22(i32 %A, i32 %X) {
 ; CHECK-LABEL: @test22(
 ; CHECK-NEXT:    ret i1 true
@ -318,6 +341,18 @@ define i32 @test23(i32 %a) {
  ret i32 %tmp.3
 }
 define <2 x i32> @test23vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test23vec(
 ; CHECK-NEXT:    [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[TMP_1]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
  %tmp.1 = and <2 x i32> %a, <i32 1, i32 1>
  %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
  %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
  ret <2 x i32> %tmp.3
 }
 define i32 @test24(i32 %a) {
 ; CHECK-LABEL: @test24(
 ; CHECK-NEXT:    [[TMP_1:%.*]] = lshr i32 %a, 2
@ -332,6 +367,20 @@ define i32 @test24(i32 %a) {
  ret i32 %tmp.3
 }
 define <2 x i32> @test24vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test24vec(
 ; CHECK-NEXT:    [[TMP_1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP_1_LOBIT:%.*]] = and <2 x i32> [[TMP_1]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[TMP_1_LOBIT]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
  %tmp1 = and <2 x i32> %a, <i32 4, i32 4>
  %tmp.1 = lshr <2 x i32> %tmp1, <i32 2, i32 2>
  %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
  %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
  ret <2 x i32> %tmp.3
 }
 define i1 @test25(i32 %A) {
 ; CHECK-LABEL: @test25(
 ; CHECK-NEXT:    ret i1 false