forked from OSchip/llvm-project
[X86] SimplifyDemandedVectorEltsForTargetNode - add PSADBW handling
Peek through PSADBW operands to handle non demanded elements.
This commit is contained in:
parent
005fc11ebd
commit
1ef62cb200
|
@ -39358,6 +39358,31 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
KnownZero = LHSZero | RHSZero;
|
||||
break;
|
||||
}
|
||||
case X86ISD::PSADBW: {
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
assert(VT.getScalarType() == MVT::i64 &&
|
||||
LHS.getValueType() == RHS.getValueType() &&
|
||||
LHS.getValueType().getScalarType() == MVT::i8 &&
|
||||
"Unexpected PSADBW types");
|
||||
|
||||
// Aggressively peek through ops to get at the demanded elts.
|
||||
if (!DemandedElts.isAllOnesValue()) {
|
||||
unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
|
||||
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
|
||||
SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
|
||||
LHS, DemandedSrcElts, TLO.DAG, Depth + 1);
|
||||
SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
|
||||
RHS, DemandedSrcElts, TLO.DAG, Depth + 1);
|
||||
if (NewLHS || NewRHS) {
|
||||
NewLHS = NewLHS ? NewLHS : LHS;
|
||||
NewRHS = NewRHS ? NewRHS : RHS;
|
||||
return TLO.CombineTo(
|
||||
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::VSHL:
|
||||
case X86ISD::VSRL:
|
||||
case X86ISD::VSRA: {
|
||||
|
|
|
@ -17,19 +17,15 @@ define <2 x i64> @combine_psadbw_shift(<16 x i8> %0, <16 x i8> %1) {
|
|||
define i64 @combine_psadbw_demandedelt(<16 x i8> %0, <16 x i8> %1) {
|
||||
; X86-LABEL: combine_psadbw_demandedelt:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
|
||||
; X86-NEXT: psadbw %xmm0, %xmm1
|
||||
; X86-NEXT: movd %xmm1, %eax
|
||||
; X86-NEXT: psadbw %xmm1, %xmm0
|
||||
; X86-NEXT: movd %xmm0, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_psadbw_demandedelt:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
|
||||
; X64-NEXT: psadbw %xmm0, %xmm1
|
||||
; X64-NEXT: movq %xmm1, %rax
|
||||
; X64-NEXT: psadbw %xmm1, %xmm0
|
||||
; X64-NEXT: movq %xmm0, %rax
|
||||
; X64-NEXT: retq
|
||||
%3 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
|
||||
%4 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
|
||||
|
|
|
@ -733,7 +733,7 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x
|
|||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
%v1 = load <8 x i8>, <8 x i8>* %p, align 1
|
||||
|
|
|
@ -18,7 +18,7 @@ define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture reado
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
|
||||
|
@ -60,7 +60,7 @@ define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture reado
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -101,7 +101,7 @@ define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture reado
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -142,7 +142,7 @@ define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture reado
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -183,7 +183,7 @@ define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -224,7 +224,7 @@ define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -265,7 +265,7 @@ define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* noca
|
|||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue