forked from OSchip/llvm-project
[X86][SSE] Relax IsSplatValue - remove the 'variable shift' limit on subtracts.
Means we don't use the per-lane-shifts as much when we can cheaply use the older splat-variable-shifts. llvm-svn: 347162
This commit is contained in:
parent
40509997eb
commit
fec9f8657b
|
@ -23902,9 +23902,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
// Determine if V is a splat value, and return the scalar.
|
||||
// TODO - can we make this generic and move to SelectionDAG?
|
||||
static SDValue IsSplatValue(MVT VT, SDValue V, const SDLoc &dl,
|
||||
SelectionDAG &DAG, const X86Subtarget &Subtarget,
|
||||
unsigned Opcode) {
|
||||
SelectionDAG &DAG) {
|
||||
V = peekThroughEXTRACT_SUBVECTORs(V);
|
||||
|
||||
// Check if this is a splat build_vector node.
|
||||
|
@ -23916,8 +23916,7 @@ static SDValue IsSplatValue(MVT VT, SDValue V, const SDLoc &dl,
|
|||
}
|
||||
|
||||
// Check for SUB(SPLAT_BV, SPLAT) cases from rotate patterns.
|
||||
if (V.getOpcode() == ISD::SUB &&
|
||||
!SupportedVectorVarShift(VT, Subtarget, Opcode)) {
|
||||
if (V.getOpcode() == ISD::SUB) {
|
||||
SDValue LHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(0));
|
||||
SDValue RHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(1));
|
||||
|
||||
|
@ -23969,7 +23968,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
Amt = peekThroughEXTRACT_SUBVECTORs(Amt);
|
||||
|
||||
if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) {
|
||||
if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG)) {
|
||||
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
|
||||
|
@ -24672,7 +24671,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
// Rotate by splat - expand back to shifts.
|
||||
// TODO - legalizers should be able to handle this.
|
||||
if ((EltSizeInBits >= 16 || Subtarget.hasBWI()) &&
|
||||
IsSplatValue(VT, Amt, DL, DAG, Subtarget, Opcode)) {
|
||||
IsSplatValue(VT, Amt, DL, DAG)) {
|
||||
SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
|
||||
AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
|
||||
SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
|
||||
|
|
|
@ -689,7 +689,7 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
|||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
|
||||
; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -804,7 +804,8 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
|||
; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
|
||||
; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -925,53 +926,17 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
|||
; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_rotate_v8i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
|
|
|
@ -522,10 +522,10 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
; AVX2-LABEL: splatvar_rotate_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm2
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [64,64,64,64]
|
||||
; AVX2-NEXT: vpsubq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
|
||||
; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -606,9 +606,10 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32]
|
||||
; AVX2-NEXT: vpsubd %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
|
||||
; AVX2-NEXT: vpsubd %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX2-NEXT: vpsrld %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -697,52 +698,17 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpbroadcastw %xmm1, %ymm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm2, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm2, %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512BW-NEXT: vpbroadcastw %xmm1, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %ymm2, %ymm3, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %ymm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm1
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %ymm2, %ymm3, %ymm2
|
||||
; AVX512VLBW-NEXT: vpsrlvw %ymm2, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastw %xmm1, %ymm2
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpsubw %xmm2, %xmm3, %xmm2
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_rotate_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
|
|
|
@ -344,22 +344,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
|
|||
; AVX512BW-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %zmm2, %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm2, %xmm3, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %zmm2, %zmm3, %zmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm2, %xmm3, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
|
|
Loading…
Reference in New Issue