forked from OSchip/llvm-project
[X86][XOP] Add SimplifyDemandedVectorElts handling for xop shifts
Noticed while investigating how to improve funnel shift codegen
This commit is contained in:
parent
13362abf3d
commit
c2426fdcae
|
@ -40082,6 +40082,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));
|
||||
break;
|
||||
}
|
||||
case X86ISD::VPSHA:
|
||||
case X86ISD::VPSHL: {
|
||||
APInt LHSUndef, LHSZero;
|
||||
APInt RHSUndef, RHSZero;
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
KnownZero = LHSZero;
|
||||
break;
|
||||
}
|
||||
case X86ISD::KSHIFTL: {
|
||||
SDValue Src = Op.getOperand(0);
|
||||
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
|
||||
|
|
|
@ -675,10 +675,11 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
|||
;
|
||||
; XOP-LABEL: combine_vec_udiv_nonuniform4:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15]
|
||||
; XOP-NEXT: movl $171, %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm1
|
||||
; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1
|
||||
; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; XOP-NEXT: movl $249, %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm2
|
||||
; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; CHECK-LABEL: demandedelts_vpshab:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
|
@ -18,7 +17,6 @@ define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: demandedelts_vpshld:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; CHECK-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue