[TargetLowering] SimplifyDemandedBits - add ANY_EXTEND_VECTOR_INREG support

Add 'lowest' demanded elt -> bitcast fold to all *_EXTEND_VECTOR_INREG cases.

Reapplies rL363856.

llvm-svn: 364311
This commit is contained in:
Simon Pilgrim 2019-06-25 13:25:57 +00:00
parent 7c1deeff4a
commit 1a18bb6f25
2 changed files with 18 additions and 3 deletions

View File

@ -1415,6 +1415,13 @@ bool TargetLowering::SimplifyDemandedBits(
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
@ -1446,12 +1453,21 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
case ISD::ANY_EXTEND: {
// TODO: Add ISD::ANY_EXTEND_VECTOR_INREG support.
case ISD::ANY_EXTEND:
case ISD::ANY_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned InBits = SrcVT.getScalarSizeInBits();
unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
if (IsVecInReg && DemandedElts == 1 &&
VT.getSizeInBits() == SrcVT.getSizeInBits() &&
TLO.DAG.getDataLayout().isLittleEndian())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,

View File

@ -663,7 +663,6 @@ define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; SSE41-NEXT: pcmpeqw {{.*}}(%rip), %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: psllq $56, %xmm0
; SSE41-NEXT: movl $32768, %eax # imm = 0x8000
; SSE41-NEXT: movq %rax, %xmm1