forked from OSchip/llvm-project
[DAG] SimplifyDemandedVectorElts - zero_extend_vector_inreg(and(x,c)) -> and(x,c')
If we've only demanded the 0'th element, and it comes from a (one-use) AND, try to convert the zero_extend_vector_inreg into a mask and constant fold it with the AND.
This commit is contained in:
parent
964536cf04
commit
5fedbd5b18
|
@ -2809,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
|||
if (DemandedElts.isSubsetOf(KnownUndef))
|
||||
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
|
||||
KnownUndef.clearAllBits();
|
||||
|
||||
// zext - if we just need the bottom element then we can mask:
|
||||
// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
|
||||
if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
|
||||
Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
|
||||
Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
|
||||
SDLoc DL(Op);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
EVT SrcSVT = SrcVT.getScalarType();
|
||||
SmallVector<SDValue> MaskElts;
|
||||
MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
|
||||
MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
|
||||
SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
|
||||
if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
|
||||
ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
|
||||
Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
|
||||
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1205,79 +1205,56 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; SSE41-NEXT: psrld $1, %xmm1
|
||||
; SSE41-NEXT: psrld %xmm4, %xmm1
|
||||
; SSE41-NEXT: pand %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE41-NEXT: pslld %xmm2, %xmm0
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512F-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512F-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512VL-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512BW-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512BW-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -1294,13 +1271,11 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512VLBW-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512VLBW-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
|
@ -1311,31 +1286,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
; AVX512VLVBMI2-NEXT: vpshldvd %xmm2, %xmm1, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_funnnel_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; XOPAVX1-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; XOPAVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; XOPAVX2-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; XOPAVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_funnnel_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOP-NEXT: vpsrld $1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v4i32:
|
||||
; X86-SSE2: # %bb.0:
|
||||
|
@ -1375,66 +1335,56 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw $1, %xmm1
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE41-NEXT: pand %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -1451,13 +1401,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
|
@ -1470,13 +1418,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; XOP-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
|
@ -1534,45 +1480,43 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE41-NEXT: pshufb %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [7,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm5
|
||||
; SSE41-NEXT: pandn %xmm4, %xmm5
|
||||
; SSE41-NEXT: psrlw $1, %xmm1
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
|
||||
; SSE41-NEXT: psrlw %xmm5, %xmm1
|
||||
; SSE41-NEXT: pcmpeqd %xmm6, %xmm6
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm6
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm6
|
||||
; SSE41-NEXT: pand %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm5
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm5
|
||||
; SSE41-NEXT: pand %xmm5, %xmm0
|
||||
; SSE41-NEXT: por %xmm6, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm7, %xmm7
|
||||
; SSE41-NEXT: psrlw %xmm5, %xmm7
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm7 = xmm7[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm7
|
||||
; SSE41-NEXT: pand %xmm4, %xmm2
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm6
|
||||
; SSE41-NEXT: pshufb %xmm3, %xmm6
|
||||
; SSE41-NEXT: pand %xmm6, %xmm0
|
||||
; SSE41-NEXT: por %xmm7, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,0,0,0]
|
||||
; AVX1-NEXT: vpandn %xmm4, %xmm2, %xmm5
|
||||
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpsrlw %xmm5, %xmm6, %xmm5
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm5, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm6, %xmm2
|
||||
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1580,16 +1524,15 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX2-NEXT: vpbroadcastb %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1
|
||||
|
@ -1602,13 +1545,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpslld %xmm4, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
|
@ -1620,13 +1561,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpslld %xmm4, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
|
@ -1638,13 +1577,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
|
@ -1657,13 +1594,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
|
@ -1676,13 +1611,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
|
@ -1694,13 +1627,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $1, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
|
|
|
@ -1252,20 +1252,18 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %xmm4, %xmm2
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
|
@ -1274,20 +1272,18 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm4, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
|
@ -1296,89 +1292,79 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm4, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %ymm2, %ymm3, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm2, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
|
|
|
@ -749,150 +749,138 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
|||
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vpandn %xmm5, %xmm2, %xmm6
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm6, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm6, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpand %xmm5, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpandq %zmm2, %zmm0, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm6, %xmm3, %xmm0
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm3, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm3, %xmm0
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $234, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpandn %xmm5, %xmm2, %xmm6
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm6, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm6, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpand %xmm5, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm3, %xmm2
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpandq %zmm2, %zmm0, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm6, %xmm3, %xmm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm3, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %xmm3, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $234, %zmm2, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512BW-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpsrlw $1, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw $1, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VBMI2-NEXT: vpsrlw $1, %zmm1, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512VLBW-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw $1, %zmm1, %zmm0
|
||||
; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $1, %zmm1, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
|
||||
|
|
|
@ -1209,15 +1209,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpslld %xmm3, %zmm0, %zmm3
|
||||
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrld %xmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
|
@ -1226,15 +1224,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpslld %xmm3, %zmm0, %zmm3
|
||||
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
|
@ -1243,15 +1239,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1261,15 +1255,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
||||
|
@ -1278,15 +1270,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1296,15 +1286,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovwb %ymm0, %xmm0
|
||||
|
|
|
@ -978,15 +978,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -994,15 +992,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1010,15 +1006,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1026,15 +1020,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
|
|
@ -525,20 +525,20 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
|
|||
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm3, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
|
||||
; AVX512F-NEXT: vpsubb %ymm3, %ymm6, %ymm3
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vpsubb %ymm2, %ymm6, %ymm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm4, %ymm6
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
|
||||
; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm8
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm8
|
||||
; AVX512F-NEXT: vpblendvb %ymm8, %ymm6, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm6
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
|
@ -547,42 +547,41 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512F-NEXT: vpblendvb %ymm10, %ymm6, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm6
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm5
|
||||
; AVX512F-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm5
|
||||
; AVX512F-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm5
|
||||
; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm5
|
||||
; AVX512F-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm5
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm9, %ymm5
|
||||
; AVX512F-NEXT: vpblendvb %ymm10, %ymm5, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm3
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm2
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm3, %ymm4
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6
|
||||
; AVX512VL-NEXT: vpsubb %ymm3, %ymm6, %ymm3
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm4
|
||||
; AVX512VL-NEXT: vpsubb %ymm2, %ymm6, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw $2, %ymm4, %ymm6
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
|
||||
; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm6
|
||||
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm8
|
||||
; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm8
|
||||
; AVX512VL-NEXT: vpblendvb %ymm8, %ymm6, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm6
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
|
@ -591,31 +590,29 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VL-NEXT: vpblendvb %ymm10, %ymm6, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm6
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm5
|
||||
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $2, %ymm3, %ymm5
|
||||
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $2, %ymm2, %ymm5
|
||||
; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm3, %ymm5
|
||||
; AVX512VL-NEXT: vpblendvb %ymm8, %ymm5, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm2, %ymm5
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm9, %ymm5
|
||||
; AVX512VL-NEXT: vpblendvb %ymm10, %ymm5, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpblendvb %ymm10, %ymm5, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm4
|
||||
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %xmm5, %xmm3
|
||||
|
@ -624,7 +621,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
|
@ -634,9 +630,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm4
|
||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm5, %xmm3
|
||||
|
@ -645,7 +640,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
|
@ -655,9 +649,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4
|
||||
; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm5, %xmm3
|
||||
|
@ -666,7 +659,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
|
@ -676,9 +668,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4
|
||||
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm5, %xmm3
|
||||
|
@ -687,7 +678,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
|
|
|
@ -1196,52 +1196,33 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pand %xmm3, %xmm4
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; SSE41-NEXT: psrld %xmm4, %xmm1
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE41-NEXT: pslld $1, %xmm0
|
||||
; SSE41-NEXT: pslld %xmm2, %xmm0
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX1-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX2-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512F-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512F-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1249,12 +1230,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512VL-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512VL-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1262,12 +1241,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512BW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512BW-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1285,12 +1262,10 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; AVX512VLBW-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; AVX512VLBW-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1303,31 +1278,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
|||
; AVX512VLVBMI2-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_funnnel_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; XOPAVX1-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; XOPAVX1-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_funnnel_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
|
||||
; XOPAVX2-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
|
||||
; XOPAVX2-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_funnnel_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [31,0,0,0]
|
||||
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOP-NEXT: vpsrld %xmm4, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpslld $1, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v4i32:
|
||||
; X86-SSE2: # %bb.0:
|
||||
|
@ -1367,13 +1327,11 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pand %xmm3, %xmm4
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: psllw $1, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
|
@ -1381,12 +1339,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1394,12 +1350,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1407,12 +1361,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1420,12 +1372,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1443,12 +1393,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1463,12 +1411,10 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %
|
|||
;
|
||||
; XOP-LABEL: splatvar_funnnel_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15]
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,0,0,0]
|
||||
; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsllw $1, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1526,43 +1472,41 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm4
|
||||
; SSE41-NEXT: pand %xmm3, %xmm4
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm1
|
||||
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE41-NEXT: pshufb %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [7,0,0,0]
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm5
|
||||
; SSE41-NEXT: pand %xmm4, %xmm5
|
||||
; SSE41-NEXT: psrlw %xmm5, %xmm1
|
||||
; SSE41-NEXT: pcmpeqd %xmm6, %xmm6
|
||||
; SSE41-NEXT: psrlw %xmm4, %xmm6
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm6
|
||||
; SSE41-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: pcmpeqd %xmm7, %xmm7
|
||||
; SSE41-NEXT: psrlw %xmm5, %xmm7
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm7 = xmm7[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; SSE41-NEXT: pand %xmm1, %xmm7
|
||||
; SSE41-NEXT: pandn %xmm4, %xmm2
|
||||
; SSE41-NEXT: paddb %xmm0, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm5
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm5
|
||||
; SSE41-NEXT: pand %xmm5, %xmm0
|
||||
; SSE41-NEXT: por %xmm6, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm6
|
||||
; SSE41-NEXT: pshufb %xmm3, %xmm6
|
||||
; SSE41-NEXT: pand %xmm6, %xmm0
|
||||
; SSE41-NEXT: por %xmm7, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,0,0,0]
|
||||
; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm5
|
||||
; AVX1-NEXT: vpsrlw %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
|
||||
; AVX1-NEXT: vpsrlw %xmm5, %xmm6, %xmm5
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpandn %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm5, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsllw %xmm2, %xmm6, %xmm2
|
||||
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
|
@ -1570,9 +1514,9 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsllw %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
|
@ -1580,7 +1524,6 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
; AVX2-NEXT: vpbroadcastb %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
|
@ -1591,13 +1534,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrld %xmm4, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpslld %xmm2, %zmm0, %zmm0
|
||||
|
@ -1608,13 +1549,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrld %xmm4, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpslld %xmm2, %zmm0, %zmm0
|
||||
|
@ -1625,13 +1564,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
|
@ -1643,13 +1580,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
|
@ -1661,13 +1596,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
|
@ -1678,13 +1611,11 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,0,0,0]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
|
|
|
@ -1241,74 +1241,66 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX2-NEXT: vpsrlw %xmm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX2-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2
|
||||
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm3
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm4, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm3, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
|
@ -1319,12 +1311,10 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
|
@ -1335,12 +1325,10 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
|
@ -1351,12 +1339,10 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm2
|
||||
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
|
|
|
@ -749,140 +749,128 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
|||
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
|
||||
; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm0, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm4, %xmm0
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
|
||||
; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
|
||||
; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %xmm4, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512BW-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
|
||||
; AVX512BW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512BW-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512VBMI2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512VLBW-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
|
||||
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VLBW-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpandn %xmm3, %xmm2, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpaddb %zmm0, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4
|
||||
; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm4
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm3, %xmm2, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||
; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm1
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm4, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm4, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: retq
|
||||
%splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %splat)
|
||||
|
|
|
@ -1292,15 +1292,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512F-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrld %xmm3, %zmm0, %zmm3
|
||||
; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
|
@ -1309,15 +1307,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrld %xmm3, %zmm0, %zmm3
|
||||
; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
|
@ -1326,15 +1322,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1344,15 +1338,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
||||
|
@ -1361,15 +1353,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1379,15 +1369,13 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
|
||||
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovwb %ymm0, %xmm0
|
||||
|
|
|
@ -1072,15 +1072,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
|
||||
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1088,15 +1086,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1104,15 +1100,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
@ -1120,15 +1114,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm3
|
||||
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
|
||||
|
|
|
@ -581,7 +581,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512F-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
|
||||
|
@ -623,7 +622,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VL-NEXT: vpblendvb %ymm9, %ymm5, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
|
||||
|
@ -637,9 +635,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
|
||||
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
|
||||
|
@ -649,7 +646,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %xmm5, %xmm0
|
||||
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
|
@ -658,9 +654,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
|
||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
|
||||
|
@ -670,7 +665,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
|
@ -679,9 +673,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VBMI2: # %bb.0:
|
||||
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
|
||||
; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
|
||||
|
@ -691,7 +684,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VBMI2-NEXT: vpsllw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
|
@ -700,9 +692,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
;
|
||||
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VLVBMI2: # %bb.0:
|
||||
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [7,7]
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
|
||||
; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
|
||||
|
@ -712,7 +703,6 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
|||
; AVX512VLVBMI2-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX512VLVBMI2-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %zmm0, %zmm2
|
||||
; AVX512VLVBMI2-NEXT: vpsllw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||
|
|
|
@ -1005,52 +1005,30 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: psrad %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
; AVX-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -1079,35 +1057,30 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -1147,7 +1120,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm2
|
||||
|
@ -1162,7 +1134,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
|
@ -1177,7 +1148,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
|
|
|
@ -824,52 +824,30 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
; AVX-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -898,35 +876,30 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -962,7 +935,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm2
|
||||
|
@ -973,7 +945,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
|
||||
|
@ -984,7 +955,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
|
||||
|
|
|
@ -238,7 +238,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
|
|||
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
|
|
|
@ -729,52 +729,30 @@ define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
; AVX-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
; XOP-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -803,35 +781,30 @@ define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -866,7 +839,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm2
|
||||
|
@ -878,7 +850,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1
|
||||
|
@ -890,7 +861,6 @@ define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwi
|
|||
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1
|
||||
|
|
|
@ -231,7 +231,6 @@ define <64 x i8> @splatvar_modulo_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwi
|
|||
; AVX512DQ-LABEL: splatvar_modulo_shift_v64i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
|
|
Loading…
Reference in New Issue