[X86] SimplifyDemandedVectorEltsForTargetNode - add PSADBW handling

Peek through PSADBW operands to handle non demanded elements.
This commit is contained in:
Simon Pilgrim 2021-09-16 11:28:17 +01:00
parent 005fc11ebd
commit 1ef62cb200
4 changed files with 37 additions and 16 deletions

View File

@ -39358,6 +39358,31 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero | RHSZero;
break;
}
case X86ISD::PSADBW: {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
assert(VT.getScalarType() == MVT::i64 &&
LHS.getValueType() == RHS.getValueType() &&
LHS.getValueType().getScalarType() == MVT::i8 &&
"Unexpected PSADBW types");
// Aggressively peek through ops to get at the demanded elts.
if (!DemandedElts.isAllOnesValue()) {
unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
LHS, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
RHS, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewLHS || NewRHS) {
NewLHS = NewLHS ? NewLHS : LHS;
NewRHS = NewRHS ? NewRHS : RHS;
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
}
}
break;
}
case X86ISD::VSHL:
case X86ISD::VSRL:
case X86ISD::VSRA: {

View File

@ -17,19 +17,15 @@ define <2 x i64> @combine_psadbw_shift(<16 x i8> %0, <16 x i8> %1) {
define i64 @combine_psadbw_demandedelt(<16 x i8> %0, <16 x i8> %1) {
; X86-LABEL: combine_psadbw_demandedelt:
; X86: # %bb.0:
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
; X86-NEXT: psadbw %xmm0, %xmm1
; X86-NEXT: movd %xmm1, %eax
; X86-NEXT: psadbw %xmm1, %xmm0
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
; X64-LABEL: combine_psadbw_demandedelt:
; X64: # %bb.0:
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
; X64-NEXT: psadbw %xmm0, %xmm1
; X64-NEXT: movq %xmm1, %rax
; X64-NEXT: psadbw %xmm1, %xmm0
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: retq
%3 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
%4 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>

View File

@ -733,7 +733,7 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
%v1 = load <8 x i8>, <8 x i8>* %p, align 1

View File

@ -18,7 +18,7 @@ define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture reado
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
@ -60,7 +60,7 @@ define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture reado
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
entry:
@ -101,7 +101,7 @@ define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture reado
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
entry:
@ -142,7 +142,7 @@ define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture reado
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
entry:
@ -183,7 +183,7 @@ define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
entry:
@ -224,7 +224,7 @@ define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
entry:
@ -265,7 +265,7 @@ define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* noca
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
entry: