forked from OSchip/llvm-project
[X86] LowerShiftByScalarVariable - find splat patterns with getSplatSourceVector instead of getSplatValue
This completes the removal of uses of SelectionDAG::getSplatValue started in D119090 - by avoiding extracting the splatted element we make it a lot easier to zero-extend the bottom 64-bits of the shift amount and fixes issues we had on 32-bit targets where i64 isn't legal. I've removed the old version of getTargetVShiftNode that took the scalar shift amount argument and LowerRotate can finally efficiently handle vXi16 rotates-by-scalar (using the same code as general funnel-shifts). The only regression we see is in the X86-AVX2 PR52719 test case in vector-shift-ashr-256.ll - this is now hitting the same problem as the X86-AVX1 case (failure to simplify a multi-use X86ISD::VBROADCAST_LOAD) which I intend to address in a follow up patch.
This commit is contained in:
parent
85c53c7092
commit
147cfcbef1
|
@ -25810,72 +25810,6 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
|
|||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
|
||||
/// Handle vector element shifts where the shift amount may or may not be a
|
||||
/// constant. Takes immediate version of shift as input.
|
||||
/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes.
|
||||
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
|
||||
SDValue SrcOp, SDValue ShAmt,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT SVT = ShAmt.getSimpleValueType();
|
||||
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
|
||||
|
||||
// Change opcode to non-immediate version.
|
||||
Opc = getTargetVShiftUniformOpcode(Opc, true);
|
||||
|
||||
// Need to build a vector containing shift amount.
|
||||
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
|
||||
// +====================+============+=======================================+
|
||||
// | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
|
||||
// +====================+============+=======================================+
|
||||
// | i64 | Yes, No | Use ShAmt as lowest elt |
|
||||
// | i32 | Yes | zero-extend in-reg |
|
||||
// | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
|
||||
// | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
|
||||
// | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
|
||||
// +====================+============+=======================================+
|
||||
|
||||
if (SVT == MVT::i64)
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
|
||||
else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
(ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
|
||||
ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
|
||||
ShAmt = ShAmt.getOperand(0);
|
||||
MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
|
||||
if (Subtarget.hasSSE41())
|
||||
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
|
||||
MVT::v2i64, ShAmt);
|
||||
else {
|
||||
SDValue ByteShift = DAG.getTargetConstant(
|
||||
(128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
|
||||
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
|
||||
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
|
||||
ByteShift);
|
||||
ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
|
||||
ByteShift);
|
||||
}
|
||||
} else if (Subtarget.hasSSE41() &&
|
||||
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
|
||||
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
|
||||
MVT::v2i64, ShAmt);
|
||||
} else {
|
||||
SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
|
||||
DAG.getUNDEF(SVT)};
|
||||
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
|
||||
}
|
||||
|
||||
// The return type has to be a 128-bit type with the same element
|
||||
// type as the input type.
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
|
||||
|
||||
ShAmt = DAG.getBitcast(ShVT, ShAmt);
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
|
||||
/// Return Mask with the necessary casting or extending
|
||||
/// for \p Mask according to \p MaskVT when lowering masking intrinsics
|
||||
static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
|
||||
|
@ -29341,22 +29275,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
|
|||
unsigned Opcode = Op.getOpcode();
|
||||
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
|
||||
|
||||
// TODO: Use getSplatSourceVector.
|
||||
if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
|
||||
if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
|
||||
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
|
||||
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
|
||||
else if (EltVT.bitsLT(MVT::i32))
|
||||
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
|
||||
|
||||
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
int BaseShAmtIdx = -1;
|
||||
if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
|
||||
if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode))
|
||||
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx,
|
||||
Subtarget, DAG);
|
||||
|
||||
// vXi8 shifts - shift as v8i16 + mask result.
|
||||
if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
|
||||
(VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
|
||||
|
@ -30217,11 +30141,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
// Attempt to fold as unpack(x,x) << zext(splat(y)):
|
||||
// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
|
||||
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
|
||||
// TODO: Handle vXi16 cases on all targets.
|
||||
if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
|
||||
(EltSizeInBits == 16 && !Subtarget.hasSSE41())) {
|
||||
if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) {
|
||||
int BaseRotAmtIdx = -1;
|
||||
if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
|
||||
if (EltSizeInBits == 16 && Subtarget.hasSSE41()) {
|
||||
unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
|
||||
return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
|
||||
}
|
||||
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
|
||||
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
|
||||
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
|
||||
|
@ -41560,12 +41486,8 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
|
|||
switch (Opc) {
|
||||
case X86ISD::VBROADCAST:
|
||||
case X86ISD::VBROADCAST_LOAD:
|
||||
// TODO: Permit vXi64 types on 32-bit targets.
|
||||
if (isTypeLegal(Op.getValueType().getVectorElementType())) {
|
||||
UndefElts = APInt::getNullValue(NumElts);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
UndefElts = APInt::getNullValue(NumElts);
|
||||
return true;
|
||||
}
|
||||
|
||||
return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
|
||||
|
|
|
@ -62,11 +62,11 @@ allocas:
|
|||
define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval) nounwind {
|
||||
; X86-LABEL: shiftInput___64in32bitmode:
|
||||
; X86: # %bb.0: # %allocas
|
||||
; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shiftInput___64in32bitmode:
|
||||
|
@ -87,11 +87,11 @@ allocas:
|
|||
define <4 x i64> @shiftInput___2x32bitcast(<4 x i64> %input, i32 %shiftval) nounwind {
|
||||
; X86-LABEL: shiftInput___2x32bitcast:
|
||||
; X86: # %bb.0: # %allocas
|
||||
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shiftInput___2x32bitcast:
|
||||
|
|
|
@ -1156,60 +1156,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
}
|
||||
|
||||
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind {
|
||||
; SSE2-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psrlw $1, %xmm1
|
||||
; SSE2-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE2-NEXT: pand %xmm3, %xmm2
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
; SSE-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE-NEXT: psrlw $1, %xmm1
|
||||
; SSE-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE-NEXT: pand %xmm3, %xmm2
|
||||
; SSE-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE41-NEXT: psrlw $1, %xmm1
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE41-NEXT: pand %xmm3, %xmm2
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; AVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
|
@ -1220,7 +1192,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
|
@ -1231,7 +1203,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
|
@ -1252,7 +1224,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
|
@ -1267,41 +1239,25 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
; AVX512VLVBMI2-NEXT: vpshldvw %xmm2, %xmm1, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; X86-SSE2-NEXT: pandn %xmm3, %xmm4
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: psrlw $1, %xmm1
|
||||
; X86-SSE2-NEXT: psrlw %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pand %xmm3, %xmm2
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: psllw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
|
|
|
@ -981,13 +981,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
|
|||
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind {
|
||||
; AVX1-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; AVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
|
||||
|
@ -1000,52 +999,44 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -1061,13 +1052,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
|
@ -1080,13 +1069,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
|
||||
; XOPAVX1-NEXT: vpsrlw $1, %xmm5, %xmm5
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
|
||||
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
|
||||
|
@ -1099,13 +1087,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
|
|
|
@ -544,49 +544,47 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
|
|||
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -599,13 +597,11 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
|
|
|
@ -925,75 +925,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE41-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: pandn %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: psrlw $1, %xmm4
|
||||
; SSE41-NEXT: psrlw %xmm3, %xmm4
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm4, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
|
||||
|
|
|
@ -756,79 +756,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
|
|||
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
|
||||
; AVX1-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
|
||||
; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
|
||||
|
|
|
@ -296,60 +296,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
|
|||
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
|
||||
|
|
|
@ -1249,60 +1249,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
}
|
||||
|
||||
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind {
|
||||
; SSE2-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE2-NEXT: pand %xmm3, %xmm4
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psllw $1, %xmm0
|
||||
; SSE2-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
; SSE-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE-NEXT: pand %xmm3, %xmm4
|
||||
; SSE-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE-NEXT: psllw $1, %xmm0
|
||||
; SSE-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pand %xmm3, %xmm4
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE41-NEXT: psllw $1, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; AVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
|
@ -1313,7 +1285,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
|
@ -1324,7 +1296,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
|
@ -1345,7 +1317,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
|
@ -1361,40 +1333,24 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; X86-SSE2-NEXT: pand %xmm3, %xmm4
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: psrlw %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: psllw $1, %xmm0
|
||||
; X86-SSE2-NEXT: psllw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
|
|
|
@ -1014,12 +1014,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
|
|||
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind {
|
||||
; AVX1-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; AVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
|
||||
|
@ -1033,12 +1032,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
@ -1046,12 +1043,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
@ -1059,12 +1054,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
@ -1072,12 +1065,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
@ -1094,12 +1085,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
@ -1114,12 +1103,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15]
|
||||
; XOPAVX1-NEXT: # xmm3 = mem[0,0]
|
||||
; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
|
||||
; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOPAVX1-NEXT: vpsllw $1, %xmm3, %xmm3
|
||||
; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3
|
||||
|
@ -1133,12 +1121,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
|
|
|
@ -546,15 +546,13 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
|
|||
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
|
||||
|
@ -566,15 +564,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
|
||||
|
@ -586,12 +582,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
|
@ -606,12 +600,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
|
|
|
@ -957,75 +957,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrlw %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE41-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: pand %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: psrlw %xmm3, %xmm4
|
||||
; SSE41-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE41-NEXT: psllw $1, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: por %xmm4, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
|
||||
; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
|
||||
|
|
|
@ -790,79 +790,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
|
|||
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
|
||||
; AVX1-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
|
||||
; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw $1, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16:
|
||||
|
|
|
@ -294,60 +294,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
|
|||
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm2, %ymm4
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16:
|
||||
|
|
|
@ -914,75 +914,70 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
|||
;
|
||||
; SSE41-LABEL: splatvar_rotate_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE41-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE41-NEXT: pandn %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE41-NEXT: psrlw $1, %xmm4
|
||||
; SSE41-NEXT: psrlw %xmm3, %xmm4
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm4, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_rotate_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_rotate_v8i16:
|
||||
|
|
|
@ -577,11 +577,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
;
|
||||
; AVX2-LABEL: splatvar_rotate_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
|
||||
; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
|
||||
; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v4i64:
|
||||
|
@ -749,79 +749,74 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16]
|
||||
; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
|
||||
; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_rotate_v16i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_rotate_v16i16:
|
||||
|
|
|
@ -309,60 +309,58 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512F-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
|
||||
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
|
||||
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
|
||||
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_rotate_v32i16:
|
||||
|
|
|
@ -992,19 +992,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
|
|||
}
|
||||
|
||||
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: andl $31, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psrad %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrad %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: psrad %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -1032,9 +1024,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
;
|
||||
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE-NEXT: andl $31, %eax
|
||||
; X86-SSE-NEXT: movd %eax, %xmm1
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: psrad %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -1044,19 +1034,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
}
|
||||
|
||||
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -1085,8 +1067,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
|
|||
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE-NEXT: psraw %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -737,8 +737,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -752,8 +752,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -779,8 +779,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -799,8 +799,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -814,8 +814,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -841,8 +841,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1099,9 +1099,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1109,17 +1108,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1127,32 +1123,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1160,9 +1150,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -1174,9 +1162,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1185,15 +1172,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1202,29 +1187,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1233,7 +1214,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
@ -2210,7 +2190,8 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
|
|||
;
|
||||
; X86-AVX2-LABEL: PR52719:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
|
||||
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
|
||||
|
|
|
@ -169,8 +169,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
|
@ -245,9 +245,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
|
|||
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; ALL-LABEL: splatvar_modulo_shift_v16i32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -259,9 +257,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
|
|||
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
|
@ -270,7 +267,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
|
|||
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -811,19 +811,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
|
|||
}
|
||||
|
||||
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: andl $31, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -851,9 +843,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
;
|
||||
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE-NEXT: andl $31, %eax
|
||||
; X86-SSE-NEXT: movd %eax, %xmm1
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: psrld %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -863,19 +853,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
}
|
||||
|
||||
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -904,8 +886,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
|
|||
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE-NEXT: psrlw %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -580,8 +580,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -595,8 +595,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -622,8 +622,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -642,8 +642,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -657,8 +657,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -684,8 +684,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -823,8 +823,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
|||
define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -838,8 +838,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -865,8 +865,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -886,9 +886,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -896,17 +895,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -914,32 +910,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -947,9 +937,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -961,9 +949,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -972,15 +959,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -989,29 +974,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -1020,7 +1001,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -133,8 +133,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
|
@ -200,9 +200,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
|
|||
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; ALL-LABEL: splatvar_modulo_shift_v16i32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -214,9 +212,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
|
|||
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
|
@ -225,7 +222,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
|
|||
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -718,19 +718,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi
|
|||
}
|
||||
|
||||
define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: andl $31, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -758,9 +750,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
;
|
||||
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE-NEXT: andl $31, %eax
|
||||
; X86-SSE-NEXT: movd %eax, %xmm1
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: pslld %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -770,19 +760,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
}
|
||||
|
||||
define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -811,8 +793,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
|
|||
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE-NEXT: psllw %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -510,8 +510,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -525,8 +525,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -552,8 +552,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -572,8 +572,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
|||
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -587,8 +587,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -614,8 +614,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -748,8 +748,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
|||
define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v4i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -763,8 +763,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -790,8 +790,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -811,9 +811,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi
|
|||
define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -821,17 +820,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; AVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -839,32 +835,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -872,9 +862,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
;
|
||||
; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -886,9 +874,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi
|
|||
define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -897,15 +884,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; AVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -914,29 +899,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
|
@ -945,7 +926,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no
|
|||
; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -128,8 +128,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
|||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
|
@ -193,9 +193,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi
|
|||
define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; ALL-LABEL: splatvar_modulo_shift_v16i32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%mod = and <16 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
|
||||
|
@ -207,9 +205,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no
|
|||
define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
|
@ -218,7 +215,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no
|
|||
; AVX512BW-LABEL: splatvar_modulo_shift_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%mod = and <32 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
|
|
|
@ -162,11 +162,11 @@ define <32 x i8> @PR22706(<32 x i1> %x) {
|
|||
define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z, <8 x i32> %w) {
|
||||
; AVX1-LABEL: blendv_split:
|
||||
; AVX1: ## %bb.0:
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpslld %xmm2, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpslld %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX1-NEXT: vpslld %xmm3, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpslld %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm1
|
||||
|
@ -180,8 +180,8 @@ define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32
|
|||
; AVX2-LABEL: blendv_split:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
|
||||
; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vmovups %ymm0, (%rdi)
|
||||
|
|
Loading…
Reference in New Issue