forked from OSchip/llvm-project
[X86] In LowerTruncateVecI1, don't add SHL if the input is known to be all sign bits.
If the input is all sign bits then the LSB through MSB are all the same so we don't need to be move the LSB to the MSB. llvm-svn: 321617
This commit is contained in:
parent
c535adcfc5
commit
0d35edda90
|
@ -16523,13 +16523,16 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
|||
if (InVT.getScalarSizeInBits() <= 16) {
|
||||
if (Subtarget.hasBWI()) {
|
||||
// legal, will go to VPMOVB2M, VPMOVW2M
|
||||
// Shift packed bytes not supported natively, bitcast to word
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
|
||||
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
|
||||
DAG.getBitcast(ExtVT, In),
|
||||
DAG.getConstant(ShiftInx, DL, ExtVT));
|
||||
ShiftNode = DAG.getBitcast(InVT, ShiftNode);
|
||||
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
|
||||
if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
|
||||
// We need to shift to get the lsb into sign position.
|
||||
// Shift packed bytes not supported natively, bitcast to word
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
|
||||
In = DAG.getNode(ISD::SHL, DL, ExtVT,
|
||||
DAG.getBitcast(ExtVT, In),
|
||||
DAG.getConstant(ShiftInx, DL, ExtVT));
|
||||
In = DAG.getBitcast(InVT, In);
|
||||
}
|
||||
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, In);
|
||||
}
|
||||
// Use TESTD/Q, extended vector to packed dword/qword.
|
||||
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
|
||||
|
@ -16542,9 +16545,12 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
|||
ShiftInx = InVT.getScalarSizeInBits() - 1;
|
||||
}
|
||||
|
||||
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
|
||||
DAG.getConstant(ShiftInx, DL, InVT));
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
|
||||
if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
|
||||
// We need to shift to get the lsb into sign position.
|
||||
In = DAG.getNode(ISD::SHL, DL, InVT, In,
|
||||
DAG.getConstant(ShiftInx, DL, InVT));
|
||||
}
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
|
|
@ -22,7 +22,6 @@ define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -53,7 +52,6 @@ define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -83,7 +81,6 @@ define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x do
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -113,7 +110,6 @@ define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x flo
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
|
@ -143,7 +139,6 @@ define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -173,7 +168,6 @@ define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
|
@ -203,7 +197,6 @@ define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -233,7 +226,6 @@ define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
|
@ -264,7 +256,6 @@ define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
|
||||
|
@ -295,7 +286,6 @@ define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -325,7 +315,6 @@ define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
|
@ -358,7 +347,6 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
|
||||
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
|
||||
|
@ -389,7 +377,6 @@ define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -419,7 +406,6 @@ define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
|
@ -450,7 +436,6 @@ define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
|
||||
|
@ -481,7 +466,6 @@ define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
|
|||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %zmm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
|
||||
|
@ -512,7 +496,6 @@ define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovapd %xmm1, (%rsi)
|
||||
|
@ -542,7 +525,6 @@ define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %xmm1, (%rsi)
|
||||
|
@ -575,7 +557,6 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
|
||||
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
|
||||
|
@ -608,7 +589,6 @@ define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
|
|||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512NOTDQ-NEXT: vpermd %zmm2, %zmm3, %zmm2
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; AVX512NOTDQ-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; AVX512NOTDQ-NEXT: vmovaps %zmm1, (%rsi)
|
||||
|
@ -765,7 +745,6 @@ define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -815,7 +794,6 @@ define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -863,7 +841,6 @@ define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -891,7 +868,6 @@ define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -939,7 +915,6 @@ define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -967,7 +942,6 @@ define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1015,7 +989,6 @@ define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1043,7 +1016,6 @@ define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1072,7 +1044,6 @@ define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1121,7 +1092,6 @@ define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1149,7 +1119,6 @@ define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1180,7 +1149,6 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
|
||||
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1229,7 +1197,6 @@ define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1257,7 +1224,6 @@ define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1286,7 +1252,6 @@ define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1315,7 +1280,6 @@ define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
|
|||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: vzeroupper
|
||||
|
@ -1363,7 +1327,6 @@ define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512NOTDQ-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1391,7 +1354,6 @@ define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1422,7 +1384,6 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
|||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
|
||||
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
|
@ -1453,7 +1414,6 @@ define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
|
|||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512NOTDQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
|
||||
; AVX512NOTDQ-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovw %k0, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: vzeroupper
|
||||
|
|
|
@ -805,7 +805,6 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
|
|||
; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
|
|
|
@ -195,14 +195,12 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
|
|||
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, (%rsp)
|
||||
; KNL-NEXT: movl (%rsp), %eax
|
||||
|
@ -235,28 +233,24 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
|
|||
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, (%rsp)
|
||||
; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: movl (%rsp), %ecx
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -31,11 +31,9 @@ define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
|
|||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -161,11 +159,9 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
|
|||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
@ -926,10 +922,8 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
|
|||
; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm2, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
|
|
@ -215,11 +215,9 @@ define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
|
|||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
|
|
@ -27,7 +27,6 @@ define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
|
|||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -130,7 +129,6 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
|
|||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
@ -623,7 +621,6 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
|
|||
; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
|
|
@ -42,7 +42,6 @@ define i16 @v16i16(<16 x i16> %a, <16 x i16> %b) {
|
|||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
@ -194,11 +193,9 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) {
|
|||
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, (%rsp)
|
||||
; AVX512F-NEXT: movl (%rsp), %eax
|
||||
|
|
|
@ -62,14 +62,12 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) {
|
|||
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
|
||||
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, (%rsp)
|
||||
; AVX512F-NEXT: movl (%rsp), %eax
|
||||
|
@ -870,21 +868,17 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8> %b) {
|
|||
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, (%rsp)
|
||||
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movl (%rsp), %ecx
|
||||
|
|
|
@ -8,7 +8,6 @@ define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
|
|||
; AVX512CD: # %bb.0: # %entry
|
||||
; AVX512CD-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
|
@ -45,7 +44,6 @@ define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
|
|||
; AVX512CD: # %bb.0: # %entry
|
||||
; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
|
@ -179,7 +177,6 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
|
|||
; AVX512CD: # %bb.0: # %entry
|
||||
; AVX512CD-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
||||
; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512CD-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512CD-NEXT: kmovw %k0, %eax
|
||||
; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
|
|
|
@ -5459,38 +5459,30 @@ define <128 x i1> @test_cmp_v128i8(<128 x i8> %a0, <128 x i8> %a1) nounwind {
|
|||
; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4
|
||||
; AVX512F-NEXT: vpslld $31, %zmm4, %zmm4
|
||||
; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 14(%rdi)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
|
||||
; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 12(%rdi)
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
|
||||
; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 10(%rdi)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 8(%rdi)
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 6(%rdi)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 4(%rdi)
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, 2(%rdi)
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, (%rdi)
|
||||
; AVX512F-NEXT: movq %rdi, %rax
|
||||
|
@ -5505,38 +5497,30 @@ define <128 x i1> @test_cmp_v128i8(<128 x i8> %a0, <128 x i8> %a1) nounwind {
|
|||
; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm4, %zmm4
|
||||
; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 14(%rdi)
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
|
||||
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 12(%rdi)
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
|
||||
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 10(%rdi)
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 8(%rdi)
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
||||
; AVX512DQ-NEXT: movq %rdi, %rax
|
||||
|
|
|
@ -16,7 +16,6 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
|
|||
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -51,7 +50,6 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
|
|||
; AVX512VL-NEXT: movq $-1, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -85,7 +83,6 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
|
|||
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
|
||||
; AVX512VL-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -110,7 +107,6 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
|
|||
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
|
@ -125,7 +121,6 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
|
|||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,1,0,3,7,7,0]
|
||||
; AVX512VL-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
|
||||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
|
@ -157,8 +152,7 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
|
|||
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
||||
; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
|
||||
; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -172,8 +166,7 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
|
|||
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
|
||||
; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
|
||||
; AVX512VL-NEXT: vpslld $31, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
|
@ -374,8 +367,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
|
|||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
|
||||
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -389,8 +381,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
|
|||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0]
|
||||
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: vptestmd %ymm2, %ymm2, %k0
|
||||
; AVX512VL-NEXT: kmovw %k0, %eax
|
||||
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
|
@ -422,8 +413,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
|
|||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -437,7 +427,6 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
|
|||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3],ymm1[4,5,6,7]
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovw %k0, %eax
|
||||
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -470,8 +459,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
|
|||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1]
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,18446744073709551615,18446744073709551615,0,0]
|
||||
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -484,7 +472,6 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
|
|||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovw %k0, %eax
|
||||
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -518,8 +505,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
|
|||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %al killed %al killed %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -534,7 +520,6 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
|
|||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovw %k0, %eax
|
||||
; AVX512VL-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -566,7 +551,6 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
|
|||
; AVX512F-NEXT: kmovw %edi, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
@ -578,7 +562,6 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
|
|||
; AVX512VL-NEXT: kmovw %edi, %k1
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512VL-NEXT: kmovw %k0, %eax
|
||||
; AVX512VL-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
|
|
@ -645,9 +645,7 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(<64 x i8>* %ptr) {
|
|||
; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
|
||||
; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512-NEXT: vpcmpeqb %xmm0, %xmm5, %xmm0
|
||||
; AVX512-NEXT: vpsllw $7, %xmm3, %xmm1
|
||||
; AVX512-NEXT: vpmovb2m %zmm1, %k0
|
||||
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %zmm3, %k0
|
||||
; AVX512-NEXT: vpmovb2m %zmm0, %k1
|
||||
; AVX512-NEXT: kxnorw %k1, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2b %k0, %zmm0
|
||||
|
@ -958,9 +956,7 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) {
|
|||
; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5],ymm1[6,7]
|
||||
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; AVX512-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
|
||||
; AVX512-NEXT: vpsllw $7, %ymm8, %ymm1
|
||||
; AVX512-NEXT: vpmovb2m %zmm1, %k0
|
||||
; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpmovb2m %zmm8, %k0
|
||||
; AVX512-NEXT: vpmovb2m %zmm0, %k1
|
||||
; AVX512-NEXT: kxnord %k1, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2b %k0, %zmm0
|
||||
|
|
Loading…
Reference in New Issue