Fixes two issue in SimplifyDemandedBits of sext_in_reg:

1) We should not try to simplify if the sext has multiple uses 2) There is no need to simplify is the source value is already sign-extended. Patch by Gil Rapaport <gil.rapaport@intel.com> Differential Revision: http://reviews.llvm.org/D6949 llvm-svn: 229659
2015-02-18 09:43:40 +00:00 · 2015-02-18 09:43:40 +00:00 · af9befa6b7
parent b7e5909a42
commit af9befa6b7
3 changed files with 27 additions and 24 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
    APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
    // If we only care about the highest bit, don't bother shifting right.
-    if (MsbMask == DemandedMask) {
+    if (MsbMask == NewMask) {
      unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
      SDValue InOp = Op.getOperand(0);
      unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
      bool AlreadySignExtended =
        TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
      // However if the input is already sign extended we expect the sign
      // extension to be dropped altogether later and do not simplify.
      if (!AlreadySignExtended) {
        // Compute the correct shift amount type, which must be getShiftAmountTy
        // for scalar types after legalization.
        EVT ShiftAmtTy = Op.getValueType();
        if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
-      // Compute the correct shift amount type, which must be getShiftAmountTy
+        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
-      // for scalar types after legalization.
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
-      EVT ShiftAmtTy = Op.getValueType();
+                                                 Op.getValueType(), InOp,
-      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+                                                 ShiftAmt));
-        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+      }
      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
                                            Op.getValueType(), InOp, ShiftAmt));
    }
    // Sign extension.  Compute the demanded bits in the result that are not
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@ -419,8 +419,8 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
 ;
 ; SSE41-LABEL: vsel_i648:
 ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
-; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
 ; SSE41-NEXT:    movaps %xmm5, %xmm1
 ; SSE41-NEXT:    movaps %xmm7, %xmm3
 ; SSE41-NEXT:    retq
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@ -59,19 +59,15 @@ bb:
 ; 
 ; <rdar://problem/18819506>
 ; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
 ; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
 ; even a faulty pattern would pass!
 ;  
 ; CHECK-LABEL: test3:
-; Compute the original mask.
+; Compute the mask.
-;	CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
+;	CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
-; Shrink the bit of the mask.
+; Do not shrink the bit of the mask.
-; CHECK-NEXT: vpslld	$31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
+; CHECK-NOT: vpslld	$31, [[MASK]], {{%xmm[0-9]+}}
-; Use the shrunk mask in the blend.
+; Use the mask in the blend.
-; CHECK-NEXT:	vblendvps	[[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK-NEXT:	vblendvps	[[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the original mask in the and.
+; Use the mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}} 
+; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}} 
 ; CHECK: retq
 define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,  <4 x i16> %tmp3, <4 x i16> %tmp12) {
  %tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>