[SelectionDAG] ComputeKnownBits - merge getValidMinimumShiftAmountConstant() and generic ISD::SHL handling.

As mentioned by @nikic on rGef5debac4302, we can merge the guaranteed bottom zero bits from the shifted value, and then, if a min shift amount is known, zero out the bottom bits as well.
2020-01-14 11:51:09 +00:00 · 2020-01-14 11:51:09 +00:00 · c05a11108b
parent bad6032bc1
commit c05a11108b
2 changed files with 14 additions and 13 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2865,24 +2865,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
    break;
  }
  case ISD::SHL:
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
-      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
      unsigned Shift = ShAmt->getZExtValue();
      Known.Zero <<= Shift;
      Known.One <<= Shift;
      // Low bits are known zero.
      Known.Zero.setLowBits(Shift);
-    } else if (const APInt *ShMinAmt =
-                   getValidMinimumShiftAmountConstant(Op, DemandedElts)) {
-      // Minimum shift low bits are known zero.
-      Known.Zero.setLowBits(ShMinAmt->getZExtValue());
-    } else {
-      // No matter the shift amount, the trailing zeros will stay zero.
-      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-      Known.Zero =
-          APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
-      Known.One.clearAllBits();
+      break;
    }
+
+    // No matter the shift amount, the trailing zeros will stay zero.
+    Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
+    Known.One.clearAllBits();
+
+    // Minimum shift low bits are known zero.
+    if (const APInt *ShMinAmt =
+            getValidMinimumShiftAmountConstant(Op, DemandedElts))
+      Known.Zero.setLowBits(ShMinAmt->getZExtValue());
    break;
  case ISD::SRL:
    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@ -877,7 +877,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0)  {
 ; SSE2-NEXT:    pmuludq %xmm2, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    por {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: combine_vec_add_shl_and_nonsplat:
@ -894,7 +894,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0)  {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
 ; AVX-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
  %1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
  %2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>