From 147cfcbef1255ba2b4875b76708dab1a685085f5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Mar 2022 16:47:20 +0000 Subject: [PATCH] [X86] LowerShiftByScalarVariable - find splat patterns with getSplatSourceVector instead of getSplatValue This completes the removal of uses of SelectionDAG::getSplatValue started in D119090 - by avoiding extracting the splatted element we make it a lot easier to zero-extend the bottom 64-bits of the shift amount and fixes issues we had on 32-bit targets where i64 isn't legal. I've removed the old version of getTargetVShiftNode that took the scalar shift amount argument and LowerRotate can finally efficiently handle vXi16 rotates-by-scalar (using the same code as general funnel-shifts). The only regression we see is in the X86-AVX2 PR52719 test case in vector-shift-ashr-256.ll - this is now hitting the same problem as the X86-AVX1 case (failure to simplify a multi-use X86ISD::VBROADCAST_LOAD) which I intend to address in a follow up patch. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 100 ++------------- llvm/test/CodeGen/X86/pr15296.ll | 20 +-- llvm/test/CodeGen/X86/vector-fshl-128.ll | 116 ++++++------------ llvm/test/CodeGen/X86/vector-fshl-256.ll | 38 ++---- llvm/test/CodeGen/X86/vector-fshl-512.ll | 56 ++++----- llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 93 +++++++------- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 99 +++++++-------- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 78 ++++++------ llvm/test/CodeGen/X86/vector-fshr-128.ll | 116 ++++++------------ llvm/test/CodeGen/X86/vector-fshr-256.ll | 38 ++---- llvm/test/CodeGen/X86/vector-fshr-512.ll | 16 +-- llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 81 ++++++------ llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 83 ++++++------- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 62 +++++----- llvm/test/CodeGen/X86/vector-rotate-128.ll | 93 +++++++------- llvm/test/CodeGen/X86/vector-rotate-256.ll | 109 ++++++++-------- llvm/test/CodeGen/X86/vector-rotate-512.ll | 78 ++++++------ .../test/CodeGen/X86/vector-shift-ashr-128.ll | 42 ++----- .../test/CodeGen/X86/vector-shift-ashr-256.ll | 57 +++------ .../test/CodeGen/X86/vector-shift-ashr-512.ll | 10 +- .../test/CodeGen/X86/vector-shift-lshr-128.ll | 42 ++----- .../test/CodeGen/X86/vector-shift-lshr-256.ll | 60 +++------ .../test/CodeGen/X86/vector-shift-lshr-512.ll | 10 +- llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 42 ++----- llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 60 +++------ llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 10 +- llvm/test/CodeGen/X86/vselect-avx.ll | 6 +- 27 files changed, 621 insertions(+), 994 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7cdb4f754ba8..12b9195e11e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25810,72 +25810,6 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -/// Handle vector element shifts where the shift amount may or may not be a -/// constant. Takes immediate version of shift as input. -/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes. -static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, - SDValue SrcOp, SDValue ShAmt, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - MVT SVT = ShAmt.getSimpleValueType(); - assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); - - // Change opcode to non-immediate version. - Opc = getTargetVShiftUniformOpcode(Opc, true); - - // Need to build a vector containing shift amount. - // SSE/AVX packed shifts only use the lower 64-bit of the shift count. - // +====================+============+=======================================+ - // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | - // +====================+============+=======================================+ - // | i64 | Yes, No | Use ShAmt as lowest elt | - // | i32 | Yes | zero-extend in-reg | - // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg | - // | (i32 zext(i16/i8)) | No | byte-shift-in-reg | - // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | - // +====================+============+=======================================+ - - if (SVT == MVT::i64) - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); - else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && - ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && - (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || - ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { - ShAmt = ShAmt.getOperand(0); - MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); - if (Subtarget.hasSSE41()) - ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), - MVT::v2i64, ShAmt); - else { - SDValue ByteShift = DAG.getTargetConstant( - (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); - ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); - ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, - ByteShift); - ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, - ByteShift); - } - } else if (Subtarget.hasSSE41() && - ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); - ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), - MVT::v2i64, ShAmt); - } else { - SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), - DAG.getUNDEF(SVT)}; - ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); - } - - // The return type has to be a 128-bit type with the same element - // type as the input type. - MVT EltVT = VT.getVectorElementType(); - MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits()); - - ShAmt = DAG.getBitcast(ShVT, ShAmt); - return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); -} - /// Return Mask with the necessary casting or extending /// for \p Mask according to \p MaskVT when lowering masking intrinsics static SDValue getMaskNode(SDValue Mask, MVT MaskVT, @@ -29341,22 +29275,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG, unsigned Opcode = Op.getOpcode(); unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); - // TODO: Use getSplatSourceVector. - if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) { - if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { - MVT EltVT = VT.getVectorElementType(); - assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); - if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); - - return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); - } - } - int BaseShAmtIdx = -1; if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) { + if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) + return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx, + Subtarget, DAG); + // vXi8 shifts - shift as v8i16 + mask result. if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || @@ -30217,11 +30141,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // Attempt to fold as unpack(x,x) << zext(splat(y)): // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw. // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))). - // TODO: Handle vXi16 cases on all targets. - if (EltSizeInBits == 8 || EltSizeInBits == 32 || - (EltSizeInBits == 16 && !Subtarget.hasSSE41())) { + if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) { int BaseRotAmtIdx = -1; if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) { + if (EltSizeInBits == 16 && Subtarget.hasSSE41()) { + unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR; + return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); + } unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI; SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); @@ -41560,12 +41486,8 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op, switch (Opc) { case X86ISD::VBROADCAST: case X86ISD::VBROADCAST_LOAD: - // TODO: Permit vXi64 types on 32-bit targets. - if (isTypeLegal(Op.getValueType().getVectorElementType())) { - UndefElts = APInt::getNullValue(NumElts); - return true; - } - return false; + UndefElts = APInt::getNullValue(NumElts); + return true; } return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts, diff --git a/llvm/test/CodeGen/X86/pr15296.ll b/llvm/test/CodeGen/X86/pr15296.ll index 8673cfdafad5..813c67591ae8 100644 --- a/llvm/test/CodeGen/X86/pr15296.ll +++ b/llvm/test/CodeGen/X86/pr15296.ll @@ -62,11 +62,11 @@ allocas: define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval) nounwind { ; X86-LABEL: shiftInput___64in32bitmode: ; X86: # %bb.0: # %allocas -; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1 +; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 +; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: shiftInput___64in32bitmode: @@ -87,11 +87,11 @@ allocas: define <4 x i64> @shiftInput___2x32bitcast(<4 x i64> %input, i32 %shiftval) nounwind { ; X86-LABEL: shiftInput___2x32bitcast: ; X86: # %bb.0: # %allocas -; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1 +; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 +; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: shiftInput___2x32bitcast: diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 4acd6f4a25eb..2d4710d55926 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -1156,60 +1156,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % } define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind { -; SSE2-LABEL: splatvar_funnnel_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: pandn %xmm3, %xmm4 -; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psrlw %xmm4, %xmm1 -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psllw %xmm2, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: splatvar_funnnel_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0] +; SSE-NEXT: movdqa %xmm2, %xmm4 +; SSE-NEXT: pandn %xmm3, %xmm4 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: psrlw %xmm4, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm2 +; SSE-NEXT: psllw %xmm2, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: retq ; -; SSE41-LABEL: splatvar_funnnel_v8i16: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0] -; SSE41-NEXT: movdqa %xmm2, %xmm4 -; SSE41-NEXT: pandn %xmm3, %xmm4 -; SSE41-NEXT: psrlw $1, %xmm1 -; SSE41-NEXT: psrlw %xmm4, %xmm1 -; SSE41-NEXT: pand %xmm3, %xmm2 -; SSE41-NEXT: psllw %xmm2, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: splatvar_funnnel_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; AVX1-NEXT: # xmm3 = mem[0,0] -; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_funnnel_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] -; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_funnnel_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 @@ -1220,7 +1192,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 @@ -1231,7 +1203,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 @@ -1252,7 +1224,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 @@ -1267,41 +1239,25 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VLVBMI2-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; -; XOPAVX1-LABEL: splatvar_funnnel_v8i16: -; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; XOPAVX1-NEXT: # xmm3 = mem[0,0] -; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOPAVX1-NEXT: retq -; -; XOPAVX2-LABEL: splatvar_funnnel_v8i16: -; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] -; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOPAVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOPAVX2-NEXT: retq +; XOP-LABEL: splatvar_funnnel_v8i16: +; XOP: # %bb.0: +; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1 +; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOP-NEXT: retq ; ; X86-SSE2-LABEL: splatvar_funnnel_v8i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0] ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 ; X86-SSE2-NEXT: pandn %xmm3, %xmm4 -; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] -; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE2-NEXT: psrlw $1, %xmm1 ; X86-SSE2-NEXT: psrlw %xmm4, %xmm1 ; X86-SSE2-NEXT: pand %xmm3, %xmm2 -; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] -; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE2-NEXT: psllw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 9f14cea61cc6..9d14f227f3a8 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -981,13 +981,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; AVX1-NEXT: # xmm3 = mem[0,0] -; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm5 ; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 @@ -1000,52 +999,44 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -1061,13 +1052,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -1080,13 +1069,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; XOPAVX1-NEXT: # xmm3 = mem[0,0] -; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; XOPAVX1-NEXT: vpsrlw $1, %xmm5, %xmm5 ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 @@ -1099,13 +1087,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 6818d6fe379d..4776f004bef4 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -544,49 +544,47 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4 -; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3 -; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 -; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3 -; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; AVX512F-NEXT: vpsrlw $1, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5 ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 +; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3 +; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4 -; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 -; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm3 -; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; AVX512VL-NEXT: vpsrlw $1, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5 ; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 +; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -599,13 +597,11 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index 3bf02ea01038..c2ef8ceaba81 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -925,75 +925,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: psllw %xmm2, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; SSE41-NEXT: psrlw %xmm1, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pandn %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: psrlw $1, %xmm4 +; SSE41-NEXT: psrlw %xmm3, %xmm4 +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: psllw %xmm1, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_funnnel_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index 20660c78ce4f..2be1d69648b8 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -756,79 +756,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5 +; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2 +; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 799abb799b7f..bfea592ecfb8 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -296,60 +296,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15] +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15] +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4 +; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index aa4d01c859b4..aa490b4966b6 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1249,60 +1249,32 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> % } define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) nounwind { -; SSE2-LABEL: splatvar_funnnel_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: pand %xmm3, %xmm4 -; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psrlw %xmm4, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm2 -; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psllw $1, %xmm0 -; SSE2-NEXT: psllw %xmm2, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: splatvar_funnnel_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0] +; SSE-NEXT: movdqa %xmm2, %xmm4 +; SSE-NEXT: pand %xmm3, %xmm4 +; SSE-NEXT: psrlw %xmm4, %xmm1 +; SSE-NEXT: pandn %xmm3, %xmm2 +; SSE-NEXT: psllw $1, %xmm0 +; SSE-NEXT: psllw %xmm2, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: retq ; -; SSE41-LABEL: splatvar_funnnel_v8i16: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0] -; SSE41-NEXT: movdqa %xmm2, %xmm4 -; SSE41-NEXT: pand %xmm3, %xmm4 -; SSE41-NEXT: psrlw %xmm4, %xmm1 -; SSE41-NEXT: pandn %xmm3, %xmm2 -; SSE41-NEXT: psllw $1, %xmm0 -; SSE41-NEXT: psllw %xmm2, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: splatvar_funnnel_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; AVX1-NEXT: # xmm3 = mem[0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatvar_funnnel_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] -; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $1, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: splatvar_funnnel_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 +; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1313,7 +1285,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1324,7 +1296,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1345,7 +1317,7 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 @@ -1361,40 +1333,24 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> % ; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; -; XOPAVX1-LABEL: splatvar_funnnel_v8i16: -; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; XOPAVX1-NEXT: # xmm3 = mem[0,0] -; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOPAVX1-NEXT: retq -; -; XOPAVX2-LABEL: splatvar_funnnel_v8i16: -; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] -; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX2-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllw $1, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; XOPAVX2-NEXT: retq +; XOP-LABEL: splatvar_funnnel_v8i16: +; XOP: # %bb.0: +; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 +; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0 +; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 +; XOP-NEXT: retq ; ; X86-SSE2-LABEL: splatvar_funnnel_v8i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0] ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 ; X86-SSE2-NEXT: pand %xmm3, %xmm4 -; X86-SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1] -; X86-SSE2-NEXT: psrldq {{.*#+}} xmm4 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE2-NEXT: psrlw %xmm4, %xmm1 ; X86-SSE2-NEXT: pandn %xmm3, %xmm2 -; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] -; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE2-NEXT: psllw $1, %xmm0 ; X86-SSE2-NEXT: psllw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index e24d1d561c21..32b2d9f835d8 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -1014,12 +1014,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> % define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; AVX1-NEXT: # xmm3 = mem[0,0] -; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3 ; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 @@ -1033,12 +1032,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1046,12 +1043,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1059,12 +1054,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1072,12 +1065,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1094,12 +1085,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1114,12 +1103,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,15] -; XOPAVX1-NEXT: # xmm3 = mem[0,0] -; XOPAVX1-NEXT: vandps %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; XOPAVX1-NEXT: vandnps %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; XOPAVX1-NEXT: vpsllw $1, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 @@ -1133,12 +1121,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i ; ; XOPAVX2-LABEL: splatvar_funnnel_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index 176af60d6870..4501fc0c9493 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -546,15 +546,13 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3 ; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3 @@ -566,15 +564,13 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15] ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3 @@ -586,12 +582,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -606,12 +600,10 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0] ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index a1da14c4c941..566695fd3656 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -957,75 +957,70 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: psrlw %xmm2, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: psrlw %xmm3, %xmm4 +; SSE41-NEXT: pandn %xmm2, %xmm1 +; SSE41-NEXT: psllw $1, %xmm0 ; SSE41-NEXT: psllw %xmm1, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_funnnel_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index f80ce78b7982..7bab44e9f78d 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -790,79 +790,74 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5 +; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsllw $1, %xmm4, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index a322679fe46a..3a533f6bc20f 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -294,60 +294,58 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm3, %ymm2, %ymm4 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5 ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2 ; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm2, %ymm4 +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5 ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 741ce27185e1..f537a9c5429e 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -914,75 +914,70 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; SSE41-LABEL: splatvar_rotate_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: psllw %xmm2, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; SSE41-NEXT: psrlw %xmm1, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,0,0,0] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pandn %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: psrlw $1, %xmm4 +; SSE41-NEXT: psrlw %xmm3, %xmm4 +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: psllw %xmm1, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_rotate_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_rotate_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_rotate_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_rotate_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_rotate_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 3591a891f801..ce452204ebd5 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -577,11 +577,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; ; AVX2-LABEL: splatvar_rotate_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64] -; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64] +; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1 +; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_rotate_v4i64: @@ -749,79 +749,74 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_rotate_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm5 +; AVX1-NEXT: vpsrlw %xmm3, %xmm5, %xmm5 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpsllw %xmm1, %xmm4, %xmm2 +; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4 +; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_rotate_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX2-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_rotate_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_rotate_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_rotate_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512BW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $1, %ymm0, %ymm4 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm4, %ymm3 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_rotate_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index cdc4aa7b75b0..4402957b4d74 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -309,60 +309,58 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512F-LABEL: splatvar_rotate_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15] +; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_rotate_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15] +; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm4 +; AVX512BW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0] +; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $1, %zmm0, %zmm4 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm4, %zmm3 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_rotate_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index e66d3933333a..821a9803bed7 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -992,19 +992,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi } define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { -; SSE2-LABEL: splatvar_modulo_shift_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: andl $31, %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: psrad %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: splatvar_modulo_shift_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: psrad %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: splatvar_modulo_shift_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: psrad %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: splatvar_modulo_shift_v4i32: ; AVX: # %bb.0: @@ -1032,9 +1024,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi ; ; X86-SSE-LABEL: splatvar_modulo_shift_v4i32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movd %xmm1, %eax -; X86-SSE-NEXT: andl $31, %eax -; X86-SSE-NEXT: movd %eax, %xmm1 +; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE-NEXT: psrad %xmm1, %xmm0 ; X86-SSE-NEXT: retl %mod = and <4 x i32> %b, @@ -1044,19 +1034,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi } define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { -; SSE2-LABEL: splatvar_modulo_shift_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psraw %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: splatvar_modulo_shift_v8i16: -; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: psraw %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: splatvar_modulo_shift_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: psraw %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: splatvar_modulo_shift_v8i16: ; AVX: # %bb.0: @@ -1085,8 +1067,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi ; X86-SSE-LABEL: splatvar_modulo_shift_v8i16: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE-NEXT: psraw %xmm1, %xmm0 ; X86-SSE-NEXT: retl %mod = and <8 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 902547a7b289..53e5fa4e1f69 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -737,8 +737,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -752,8 +752,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -779,8 +779,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; X86-AVX1-LABEL: splatvar_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -799,8 +799,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -814,8 +814,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -841,8 +841,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; X86-AVX1-LABEL: splatvar_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1099,9 +1099,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1109,17 +1108,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; AVX2-LABEL: splatvar_modulo_shift_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1127,32 +1123,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v8i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1160,9 +1150,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl %mod = and <8 x i32> %b, @@ -1174,9 +1162,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1185,15 +1172,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; AVX2-LABEL: splatvar_modulo_shift_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1202,29 +1187,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v16i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v16i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1233,7 +1214,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16: ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl %mod = and <16 x i16> %b, @@ -2210,7 +2190,8 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; ; X86-AVX2-LABEL: PR52719: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 +; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] ; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 ; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index baba294d648b..b1e6c739fac2 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -169,8 +169,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -245,9 +245,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { ; ALL-LABEL: splatvar_modulo_shift_v16i32: ; ALL: # %bb.0: -; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %mod = and <16 x i32> %b, @@ -259,9 +257,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512DQ-NEXT: vpsraw %xmm1, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -270,7 +267,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no ; AVX512BW-LABEL: splatvar_modulo_shift_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %mod = and <32 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index e17e721ac4fe..6b5a76f2a0e4 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -811,19 +811,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi } define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { -; SSE2-LABEL: splatvar_modulo_shift_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: andl $31, %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: psrld %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: splatvar_modulo_shift_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: psrld %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: splatvar_modulo_shift_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: psrld %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: splatvar_modulo_shift_v4i32: ; AVX: # %bb.0: @@ -851,9 +843,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi ; ; X86-SSE-LABEL: splatvar_modulo_shift_v4i32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movd %xmm1, %eax -; X86-SSE-NEXT: andl $31, %eax -; X86-SSE-NEXT: movd %eax, %xmm1 +; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE-NEXT: psrld %xmm1, %xmm0 ; X86-SSE-NEXT: retl %mod = and <4 x i32> %b, @@ -863,19 +853,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi } define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { -; SSE2-LABEL: splatvar_modulo_shift_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psrlw %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: splatvar_modulo_shift_v8i16: -; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: psrlw %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: splatvar_modulo_shift_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: psrlw %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: splatvar_modulo_shift_v8i16: ; AVX: # %bb.0: @@ -904,8 +886,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi ; X86-SSE-LABEL: splatvar_modulo_shift_v8i16: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE-NEXT: psrlw %xmm1, %xmm0 ; X86-SSE-NEXT: retl %mod = and <8 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 921be75d4ab8..a36d48431931 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -580,8 +580,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -595,8 +595,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -622,8 +622,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; X86-AVX1-LABEL: splatvar_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -642,8 +642,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -657,8 +657,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -684,8 +684,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; X86-AVX1-LABEL: splatvar_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -823,8 +823,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -838,8 +838,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -865,8 +865,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -886,9 +886,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -896,17 +895,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; AVX2-LABEL: splatvar_modulo_shift_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -914,32 +910,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v8i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -947,9 +937,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl %mod = and <8 x i32> %b, @@ -961,9 +949,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -972,15 +959,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; AVX2-LABEL: splatvar_modulo_shift_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -989,29 +974,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v16i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v16i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -1020,7 +1001,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16: ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl %mod = and <16 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll index 289201da0259..fa4575dd54e8 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -133,8 +133,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -200,9 +200,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { ; ALL-LABEL: splatvar_modulo_shift_v16i32: ; ALL: # %bb.0: -; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %mod = and <16 x i32> %b, @@ -214,9 +212,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -225,7 +222,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no ; AVX512BW-LABEL: splatvar_modulo_shift_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %mod = and <32 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index f063bcadd5c7..73e2b29fb3d8 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -718,19 +718,11 @@ define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwi } define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { -; SSE2-LABEL: splatvar_modulo_shift_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: andl $31, %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pslld %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: splatvar_modulo_shift_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pslld %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: splatvar_modulo_shift_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pslld %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: splatvar_modulo_shift_v4i32: ; AVX: # %bb.0: @@ -758,9 +750,7 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi ; ; X86-SSE-LABEL: splatvar_modulo_shift_v4i32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movd %xmm1, %eax -; X86-SSE-NEXT: andl $31, %eax -; X86-SSE-NEXT: movd %eax, %xmm1 +; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE-NEXT: pslld %xmm1, %xmm0 ; X86-SSE-NEXT: retl %mod = and <4 x i32> %b, @@ -770,19 +760,11 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi } define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { -; SSE2-LABEL: splatvar_modulo_shift_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psllw %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: splatvar_modulo_shift_v8i16: -; SSE41: # %bb.0: -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: psllw %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: splatvar_modulo_shift_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: psllw %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: splatvar_modulo_shift_v8i16: ; AVX: # %bb.0: @@ -811,8 +793,6 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi ; X86-SSE-LABEL: splatvar_modulo_shift_v8i16: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE-NEXT: psllw %xmm1, %xmm0 ; X86-SSE-NEXT: retl %mod = and <8 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index 40450b93c88a..cfb4fef06fd8 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -510,8 +510,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -525,8 +525,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; XOPAVX1-LABEL: splatvar_shift_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -552,8 +552,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; X86-AVX1-LABEL: splatvar_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -572,8 +572,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -587,8 +587,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; XOPAVX1-LABEL: splatvar_shift_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -614,8 +614,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; X86-AVX1-LABEL: splatvar_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -748,8 +748,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -763,8 +763,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -790,8 +790,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -811,9 +811,8 @@ define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwi define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -821,17 +820,14 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; AVX2-LABEL: splatvar_modulo_shift_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -839,32 +835,26 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v8i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -872,9 +862,7 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi ; ; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; X86-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl %mod = and <8 x i32> %b, @@ -886,9 +874,8 @@ define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwi define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX1-LABEL: splatvar_modulo_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -897,15 +884,13 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; AVX2-LABEL: splatvar_modulo_shift_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -914,29 +899,25 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatvar_modulo_shift_v16i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_modulo_shift_v16i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -945,7 +926,6 @@ define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) no ; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16: ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 -; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl %mod = and <16 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll index c65492dcfe04..04b04ed3f1d2 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll @@ -128,8 +128,8 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_shift_v32i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -193,9 +193,7 @@ define <8 x i64> @splatvar_modulo_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwi define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { ; ALL-LABEL: splatvar_modulo_shift_v16i32: ; ALL: # %bb.0: -; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %mod = and <16 x i32> %b, @@ -207,9 +205,8 @@ define <16 x i32> @splatvar_modulo_shift_v16i32(<16 x i32> %a, <16 x i32> %b) no define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512DQ-LABEL: splatvar_modulo_shift_v32i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -218,7 +215,6 @@ define <32 x i16> @splatvar_modulo_shift_v32i16(<32 x i16> %a, <32 x i16> %b) no ; AVX512BW-LABEL: splatvar_modulo_shift_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %mod = and <32 x i16> %b, diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll index 795a52965067..f8072ff00a13 100644 --- a/llvm/test/CodeGen/X86/vselect-avx.ll +++ b/llvm/test/CodeGen/X86/vselect-avx.ll @@ -162,11 +162,11 @@ define <32 x i8> @PR22706(<32 x i1> %x) { define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z, <8 x i32> %w) { ; AVX1-LABEL: blendv_split: ; AVX1: ## %bb.0: -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; AVX1-NEXT: vpslld %xmm2, %xmm4, %xmm5 ; AVX1-NEXT: vpslld %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero ; AVX1-NEXT: vpslld %xmm3, %xmm4, %xmm4 ; AVX1-NEXT: vpslld %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm1 @@ -180,8 +180,8 @@ define void @blendv_split(<8 x i32>* %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32 ; AVX2-LABEL: blendv_split: ; AVX2: ## %bb.0: ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero ; AVX2-NEXT: vpslld %xmm2, %ymm1, %ymm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero ; AVX2-NEXT: vpslld %xmm3, %ymm1, %ymm1 ; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: vmovups %ymm0, (%rdi)