[SelectionDAG] ComputeKnownBits - merge getValidMinimumShiftAmountConstant() and generic ISD::SHL handling.

As mentioned by @nikic on rGef5debac4302, we can merge the guaranteed bottom zero bits from the shifted value, and then, if a min shift amount is known, zero out the bottom bits as well.
This commit is contained in:
Simon Pilgrim 2020-01-14 11:51:09 +00:00
parent bad6032bc1
commit c05a11108b
2 changed files with 14 additions and 13 deletions

View File

@ -2865,24 +2865,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::SHL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
Known.Zero <<= Shift;
Known.One <<= Shift;
// Low bits are known zero.
Known.Zero.setLowBits(Shift);
} else if (const APInt *ShMinAmt =
getValidMinimumShiftAmountConstant(Op, DemandedElts)) {
// Minimum shift low bits are known zero.
Known.Zero.setLowBits(ShMinAmt->getZExtValue());
} else {
// No matter the shift amount, the trailing zeros will stay zero.
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known.Zero =
APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
Known.One.clearAllBits();
break;
}
// No matter the shift amount, the trailing zeros will stay zero.
Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
Known.One.clearAllBits();
// Minimum shift low bits are known zero.
if (const APInt *ShMinAmt =
getValidMinimumShiftAmountConstant(Op, DemandedElts))
Known.Zero.setLowBits(ShMinAmt->getZExtValue());
break;
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

View File

@ -877,7 +877,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
; SSE2-NEXT: pmuludq %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_add_shl_and_nonsplat:
@ -894,7 +894,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
%2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>