forked from OSchip/llvm-project
Fixes two issue in SimplifyDemandedBits of sext_in_reg:
1) We should not try to simplify if the sext has multiple uses 2) There is no need to simplify is the source value is already sign-extended. Patch by Gil Rapaport <gil.rapaport@intel.com> Differential Revision: http://reviews.llvm.org/D6949 llvm-svn: 229659
This commit is contained in:
parent
b7e5909a42
commit
af9befa6b7
|
@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
||||||
|
|
||||||
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
|
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
|
||||||
// If we only care about the highest bit, don't bother shifting right.
|
// If we only care about the highest bit, don't bother shifting right.
|
||||||
if (MsbMask == DemandedMask) {
|
if (MsbMask == NewMask) {
|
||||||
unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
|
unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
|
||||||
SDValue InOp = Op.getOperand(0);
|
SDValue InOp = Op.getOperand(0);
|
||||||
|
unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
|
||||||
|
bool AlreadySignExtended =
|
||||||
|
TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
|
||||||
|
// However if the input is already sign extended we expect the sign
|
||||||
|
// extension to be dropped altogether later and do not simplify.
|
||||||
|
if (!AlreadySignExtended) {
|
||||||
|
// Compute the correct shift amount type, which must be getShiftAmountTy
|
||||||
|
// for scalar types after legalization.
|
||||||
|
EVT ShiftAmtTy = Op.getValueType();
|
||||||
|
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
|
||||||
|
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
|
||||||
|
|
||||||
// Compute the correct shift amount type, which must be getShiftAmountTy
|
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
|
||||||
// for scalar types after legalization.
|
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
|
||||||
EVT ShiftAmtTy = Op.getValueType();
|
Op.getValueType(), InOp,
|
||||||
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
|
ShiftAmt));
|
||||||
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
|
}
|
||||||
|
|
||||||
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
|
|
||||||
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
|
|
||||||
Op.getValueType(), InOp, ShiftAmt));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sign extension. Compute the demanded bits in the result that are not
|
// Sign extension. Compute the demanded bits in the result that are not
|
||||||
|
|
|
@ -419,8 +419,8 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||||
;
|
;
|
||||||
; SSE41-LABEL: vsel_i648:
|
; SSE41-LABEL: vsel_i648:
|
||||||
; SSE41: # BB#0: # %entry
|
; SSE41: # BB#0: # %entry
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
|
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
|
; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
|
||||||
; SSE41-NEXT: movaps %xmm5, %xmm1
|
; SSE41-NEXT: movaps %xmm5, %xmm1
|
||||||
; SSE41-NEXT: movaps %xmm7, %xmm3
|
; SSE41-NEXT: movaps %xmm7, %xmm3
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
|
|
|
@ -59,19 +59,15 @@ bb:
|
||||||
;
|
;
|
||||||
; <rdar://problem/18819506>
|
; <rdar://problem/18819506>
|
||||||
|
|
||||||
; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
|
|
||||||
; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
|
|
||||||
; even a faulty pattern would pass!
|
|
||||||
;
|
|
||||||
; CHECK-LABEL: test3:
|
; CHECK-LABEL: test3:
|
||||||
; Compute the original mask.
|
; Compute the mask.
|
||||||
; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
|
; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
|
||||||
; Shrink the bit of the mask.
|
; Do not shrink the bit of the mask.
|
||||||
; CHECK-NEXT: vpslld $31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
|
; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}}
|
||||||
; Use the shrunk mask in the blend.
|
; Use the mask in the blend.
|
||||||
; CHECK-NEXT: vblendvps [[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
|
; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
|
||||||
; Use the original mask in the and.
|
; Use the mask in the and.
|
||||||
; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}}
|
; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}}
|
||||||
; CHECK: retq
|
; CHECK: retq
|
||||||
define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
|
define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
|
||||||
%tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
|
%tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
|
||||||
|
|
Loading…
Reference in New Issue