[X86] LowerShiftByScalarVariable - find splat patterns with getSplatSourceVector instead of getSplatValue

This completes the removal of uses of SelectionDAG::getSplatValue started in D119090 - by avoiding extracting the splatted element we make it a lot easier to zero-extend the bottom 64-bits of the shift amount and fixes issues we had on 32-bit targets where i64 isn't legal.

I've removed the old version of getTargetVShiftNode that took the scalar shift amount argument and LowerRotate can finally efficiently handle vXi16 rotates-by-scalar (using the same code as general funnel-shifts).

The only regression we see is in the X86-AVX2 PR52719 test case in vector-shift-ashr-256.ll - this is now hitting the same problem as the X86-AVX1 case (failure to simplify a multi-use X86ISD::VBROADCAST_LOAD) which I intend to address in a follow up patch.
This commit is contained in:
Simon Pilgrim 2022-03-04 16:47:20 +00:00
parent 85c53c7092
commit 147cfcbef1
27 changed files with 621 additions and 994 deletions

View File

@ -25810,72 +25810,6 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
/// Handle vector element shifts where the shift amount may or may not be a
/// constant. Takes immediate version of shift as input.
/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes.
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
// Change opcode to non-immediate version.
Opc = getTargetVShiftUniformOpcode(Opc, true);
// Need to build a vector containing shift amount.
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
// +====================+============+=======================================+
// | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
// +====================+============+=======================================+
// | i64 | Yes, No | Use ShAmt as lowest elt |
// | i32 | Yes | zero-extend in-reg |
// | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
// | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
// | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
// +====================+============+=======================================+
if (SVT == MVT::i64)
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
(ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
ShAmt = ShAmt.getOperand(0);
MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
if (Subtarget.hasSSE41())
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
else {
SDValue ByteShift = DAG.getTargetConstant(
(128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
ByteShift);
ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
ByteShift);
}
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
} else {
SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
DAG.getUNDEF(SVT)};
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
/// Return Mask with the necessary casting or extending
/// for \p Mask according to \p MaskVT when lowering masking intrinsics
static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
@ -29341,22 +29275,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
unsigned Opcode = Op.getOpcode();
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
// TODO: Use getSplatSourceVector.
if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
}
int BaseShAmtIdx = -1;
if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode))
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx,
Subtarget, DAG);
// vXi8 shifts - shift as v8i16 + mask result.
if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
(VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
@ -30217,11 +30141,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to fold as unpack(x,x) << zext(splat(y)):
// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
// TODO: Handle vXi16 cases on all targets.
if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
(EltSizeInBits == 16 && !Subtarget.hasSSE41())) {
if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) {
int BaseRotAmtIdx = -1;
if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
if (EltSizeInBits == 16 && Subtarget.hasSSE41()) {
unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
}
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
@ -41560,12 +41486,8 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
switch (Opc) {
case X86ISD::VBROADCAST:
case X86ISD::VBROADCAST_LOAD:
// TODO: Permit vXi64 types on 32-bit targets.
if (isTypeLegal(Op.getValueType().getVectorElementType())) {
UndefElts = APInt::getNullValue(NumElts);
return true;
}
return false;
UndefElts = APInt::getNullValue(NumElts);
return true;
}
return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,

View File

@ -62,11 +62,11 @@ allocas:
define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval) nounwind {
; X86-LABEL: shiftInput___64in32bitmode:
; X86: # %bb.0: # %allocas
; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: shiftInput___64in32bitmode:
@ -87,11 +87,11 @@ allocas:
define <4 x i64> @shiftInput___2x32bitcast(<4 x i64> %input, i32 %shiftval) nounwind {
; X86-LABEL: shiftInput___2x32bitcast:
; X86: # %bb.0: # %allocas
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: shiftInput___2x32bitcast:

View File

@ -1156,60 +1156,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
}
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: pandn %xmm3, %xmm4
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: psrlw %xmm4, %xmm1
; SSE2-NEXT: pand %xmm3, %xmm2
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psllw %xmm2, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE-LABEL: splatvar_funnnel_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; SSE-NEXT: movdqa %xmm2, %xmm4
; SSE-NEXT: pandn %xmm3, %xmm4
; SSE-NEXT: psrlw $1, %xmm1
; SSE-NEXT: psrlw %xmm4, %xmm1
; SSE-NEXT: pand %xmm3, %xmm2
; SSE-NEXT: psllw %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pandn %xmm3, %xmm4
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: psrlw %xmm4, %xmm1
; SSE41-NEXT: pand %xmm3, %xmm2
; SSE41-NEXT: psllw %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatvar_funnnel_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v8i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@ -1220,7 +1192,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@ -1231,7 +1203,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@ -1252,7 +1224,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
@ -1267,41 +1239,25 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
; AVX512VLVBMI2-NEXT: vpshldvw %xmm2, %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
; XOP-LABEL: splatvar_funnnel_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: pandn %xmm3, %xmm4
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psrlw $1, %xmm1
; X86-SSE2-NEXT: psrlw %xmm4, %xmm1
; X86-SSE2-NEXT: pand %xmm3, %xmm2
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psllw %xmm2, %xmm0
; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl

View File

@ -981,13 +981,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm5
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
@ -1000,52 +999,44 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
@ -1061,13 +1052,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
@ -1080,13 +1069,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; XOPAVX1-NEXT: vpsrlw $1, %xmm5, %xmm5
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
@ -1099,13 +1087,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; XOPAVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq

View File

@ -544,49 +544,47 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vpsrlw $1, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm3
; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512VL-NEXT: vpsrlw $1, %ymm5, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
@ -599,13 +597,11 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq

View File

@ -925,75 +925,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psllw %xmm2, %xmm3
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
; SSE41-NEXT: psubw %xmm1, %xmm2
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: pandn %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: psrlw $1, %xmm4
; SSE41-NEXT: psrlw %xmm3, %xmm4
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: psllw %xmm1, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:

View File

@ -756,79 +756,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:

View File

@ -296,60 +296,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:

View File

@ -1249,60 +1249,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
}
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: pand %xmm3, %xmm4
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psrlw %xmm4, %xmm1
; SSE2-NEXT: pandn %xmm3, %xmm2
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psllw $1, %xmm0
; SSE2-NEXT: psllw %xmm2, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE-LABEL: splatvar_funnnel_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; SSE-NEXT: movdqa %xmm2, %xmm4
; SSE-NEXT: pand %xmm3, %xmm4
; SSE-NEXT: psrlw %xmm4, %xmm1
; SSE-NEXT: pandn %xmm3, %xmm2
; SSE-NEXT: psllw $1, %xmm0
; SSE-NEXT: psllw %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: psrlw %xmm4, %xmm1
; SSE41-NEXT: pandn %xmm3, %xmm2
; SSE41-NEXT: psllw $1, %xmm0
; SSE41-NEXT: psllw %xmm2, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatvar_funnnel_v8i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v8i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
@ -1313,7 +1285,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
@ -1324,7 +1296,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
@ -1345,7 +1317,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
@ -1361,40 +1333,24 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpsllw $1, %xmm0, %xmm0
; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
; XOP-LABEL: splatvar_funnnel_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOP-NEXT: vpsllw $1, %xmm0, %xmm0
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
; X86-SSE2-NEXT: pand %xmm3, %xmm4
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psrlw %xmm4, %xmm1
; X86-SSE2-NEXT: pandn %xmm3, %xmm2
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE2-NEXT: psllw $1, %xmm0
; X86-SSE2-NEXT: psllw %xmm2, %xmm0
; X86-SSE2-NEXT: por %xmm1, %xmm0

View File

@ -1014,12 +1014,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; AVX1-NEXT: # xmm3 = mem[0,0]
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3
; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
@ -1033,12 +1032,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -1046,12 +1043,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -1059,12 +1054,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -1072,12 +1065,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -1094,12 +1085,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
@ -1114,12 +1103,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; XOPAVX1-NEXT: vpsllw $1, %xmm3, %xmm3
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
@ -1133,12 +1121,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
;
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0
; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0

View File

@ -546,15 +546,13 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
@ -566,15 +564,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
@ -586,12 +582,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
@ -606,12 +600,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0

View File

@ -957,75 +957,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psrlw %xmm2, %xmm3
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
; SSE41-NEXT: psubw %xmm1, %xmm2
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: pand %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: psrlw %xmm3, %xmm4
; SSE41-NEXT: pandn %xmm2, %xmm1
; SSE41-NEXT: psllw $1, %xmm0
; SSE41-NEXT: psllw %xmm1, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_funnnel_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:

View File

@ -790,79 +790,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
; AVX1-LABEL: splatvar_funnnel_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5
; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllw $1, %xmm4, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_funnnel_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_funnnel_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:

View File

@ -294,60 +294,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm3, %ymm2, %ymm4
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2
; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm2, %ymm4
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:

View File

@ -914,75 +914,70 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; SSE41-LABEL: splatvar_rotate_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psllw %xmm2, %xmm3
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
; SSE41-NEXT: psubw %xmm1, %xmm2
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
; SSE41-NEXT: movdqa %xmm1, %xmm3
; SSE41-NEXT: pandn %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm0, %xmm4
; SSE41-NEXT: psrlw $1, %xmm4
; SSE41-NEXT: psrlw %xmm3, %xmm4
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: psllw %xmm1, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_rotate_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_rotate_v8i16:

View File

@ -577,11 +577,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v4i64:
@ -749,79 +749,74 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_rotate_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_rotate_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v16i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_rotate_v16i16:

View File

@ -309,60 +309,58 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512F-LABEL: splatvar_rotate_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatvar_rotate_v32i16:

View File

@ -992,19 +992,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
}
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: psrad %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: splatvar_modulo_shift_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psrad %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v4i32:
; AVX: # %bb.0:
@ -1032,9 +1024,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movd %xmm1, %eax
; X86-SSE-NEXT: andl $31, %eax
; X86-SSE-NEXT: movd %eax, %xmm1
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: psrad %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@ -1044,19 +1034,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
}
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psraw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: psraw %xmm1, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: splatvar_modulo_shift_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psraw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
@ -1085,8 +1067,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psraw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -737,8 +737,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -752,8 +752,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -779,8 +779,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -799,8 +799,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -814,8 +814,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -841,8 +841,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1099,9 +1099,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1109,17 +1108,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1127,32 +1123,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1160,9 +1150,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@ -1174,9 +1162,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1185,15 +1172,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1202,29 +1187,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1233,7 +1214,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@ -2210,7 +2190,8 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0

View File

@ -169,8 +169,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@ -245,9 +245,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@ -259,9 +257,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@ -270,7 +267,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -811,19 +811,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
}
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: psrld %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: psrld %xmm1, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: splatvar_modulo_shift_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psrld %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v4i32:
; AVX: # %bb.0:
@ -851,9 +843,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movd %xmm1, %eax
; X86-SSE-NEXT: andl $31, %eax
; X86-SSE-NEXT: movd %eax, %xmm1
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: psrld %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@ -863,19 +853,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
}
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psrlw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: splatvar_modulo_shift_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psrlw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
@ -904,8 +886,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psrlw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -580,8 +580,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -595,8 +595,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -622,8 +622,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -642,8 +642,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -657,8 +657,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -684,8 +684,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -823,8 +823,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -838,8 +838,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -865,8 +865,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -886,9 +886,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -896,17 +895,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -914,32 +910,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -947,9 +937,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@ -961,9 +949,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -972,15 +959,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -989,29 +974,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -1020,7 +1001,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -133,8 +133,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@ -200,9 +200,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@ -214,9 +212,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@ -225,7 +222,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -718,19 +718,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
}
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: pslld %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pslld %xmm1, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: splatvar_modulo_shift_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: pslld %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v4i32:
; AVX: # %bb.0:
@ -758,9 +750,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movd %xmm1, %eax
; X86-SSE-NEXT: andl $31, %eax
; X86-SSE-NEXT: movd %eax, %xmm1
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslld %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@ -770,19 +760,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
}
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psllw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: psllw %xmm1, %xmm0
; SSE41-NEXT: retq
; SSE-LABEL: splatvar_modulo_shift_v8i16:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psllw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
@ -811,8 +793,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psllw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -510,8 +510,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -525,8 +525,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; XOPAVX1-LABEL: splatvar_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -552,8 +552,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; X86-AVX1-LABEL: splatvar_shift_v8i32:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -572,8 +572,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -587,8 +587,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; XOPAVX1-LABEL: splatvar_shift_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -614,8 +614,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; X86-AVX1-LABEL: splatvar_shift_v16i16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -748,8 +748,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -763,8 +763,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -790,8 +790,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -811,9 +811,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -821,17 +820,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -839,32 +835,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -872,9 +862,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
;
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@ -886,9 +874,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -897,15 +884,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -914,29 +899,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@ -945,7 +926,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -128,8 +128,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@ -193,9 +193,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
; ALL-LABEL: splatvar_modulo_shift_v16i32:
; ALL: # %bb.0:
; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
; ALL-NEXT: retq
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
@ -207,9 +205,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@ -218,7 +215,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>

View File

@ -162,11 +162,11 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z, <8 x i32> %w) {
; AVX1-LABEL: blendv_split:
; AVX1: ## %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpslld %xmm2, %xmm4, %xmm5
; AVX1-NEXT: vpslld %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpslld %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpslld %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm1
@ -180,8 +180,8 @@ define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32
; AVX2-LABEL: blendv_split:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: vmovups %ymm0, (%rdi)