forked from OSchip/llvm-project
[DAGCombiner][X86] Disable narrowExtractedVectorLoad if the element type size isn't byte sized
The address calculation for the offset assumes that you can calculate the offset by multiplying the index by the store size of the element. But that only works if the element's store size is exactly its real size since we store vectors tightly packed in memory. There are improvements we could make to this like special casing extracting element 0. I think we could also handle cases where the extracted VT is byte sized and the index is aligned with the extract element count. Differential Revision: https://reviews.llvm.org/D75377
This commit is contained in:
parent
2ac19feb15
commit
0cd6712a7a
|
@ -18661,6 +18661,13 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
|
|||
|
||||
// Allow targets to opt-out.
|
||||
EVT VT = Extract->getValueType(0);
|
||||
|
||||
// Only handle byte sized scalars otherwise the offset is incorrect.
|
||||
// FIXME: We might be able to do better if the VT is byte sized and the index
|
||||
// is aligned.
|
||||
if (!VT.getScalarType().isByteSized())
|
||||
return SDValue();
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
|
||||
return SDValue();
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 4(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $4, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -15,7 +16,8 @@ define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
|
@ -32,7 +34,8 @@ define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub
|
|||
define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v8i1_broadcast_7_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 6(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $6, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -42,7 +45,8 @@ define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
|
@ -59,7 +63,8 @@ define void @load_v8i1_broadcast_7_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x doub
|
|||
define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_8_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 8(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -69,7 +74,8 @@ define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x do
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
|
@ -86,7 +92,8 @@ define void @load_v16i1_broadcast_8_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x do
|
|||
define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_8_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 8(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovd2m %xmm2, %k1
|
||||
|
@ -96,7 +103,8 @@ define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x flo
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
|
@ -113,7 +121,8 @@ define void @load_v16i1_broadcast_8_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x flo
|
|||
define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_15_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 14(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -123,7 +132,8 @@ define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
|
@ -140,7 +150,8 @@ define void @load_v16i1_broadcast_15_v2i1(<16 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_15_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 12(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $12, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; AVX512-NEXT: vpmovd2m %xmm2, %k1
|
||||
|
@ -150,7 +161,8 @@ define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
|
@ -167,7 +179,8 @@ define void @load_v16i1_broadcast_15_v4i1(<16 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -177,7 +190,8 @@ define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
|
@ -194,7 +208,8 @@ define void @load_v32i1_broadcast_16_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovd2m %xmm2, %k1
|
||||
|
@ -204,7 +219,8 @@ define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
|
@ -221,7 +237,8 @@ define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v8i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm2
|
||||
; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
; AVX512-NEXT: vpmovd2m %ymm2, %k1
|
||||
|
@ -232,7 +249,8 @@ define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
|
@ -250,7 +268,8 @@ define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 30(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $30, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -260,7 +279,8 @@ define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $30, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
|
@ -277,7 +297,8 @@ define void @load_v32i1_broadcast_31_v2i1(<32 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 28(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $28, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; AVX512-NEXT: vpmovd2m %xmm2, %k1
|
||||
|
@ -287,7 +308,8 @@ define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $28, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
|
@ -304,7 +326,8 @@ define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v8i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 24(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $24, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
|
||||
; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
|
||||
|
@ -316,7 +339,8 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
|
||||
|
@ -335,7 +359,8 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -345,7 +370,8 @@ define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm2, %xmm2
|
||||
|
@ -362,7 +388,8 @@ define void @load_v64i1_broadcast_32_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm2
|
||||
; AVX512-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpmovd2m %xmm2, %k1
|
||||
|
@ -372,7 +399,8 @@ define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %xmm2
|
||||
|
@ -389,7 +417,8 @@ define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v8i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm2
|
||||
; AVX512-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
; AVX512-NEXT: vpmovd2m %ymm2, %k1
|
||||
|
@ -400,7 +429,8 @@ define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %ymm2
|
||||
|
@ -418,7 +448,8 @@ define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v16i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovw 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %zmm2
|
||||
; AVX512-NEXT: vpbroadcastd %xmm2, %zmm2
|
||||
; AVX512-NEXT: vpmovd2m %zmm2, %k1
|
||||
|
@ -429,7 +460,8 @@ define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm2, %zmm2
|
||||
; AVX512NOTDQ-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
|
@ -446,7 +478,8 @@ define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
|
|||
define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v2i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 62(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $62, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm2, %k1
|
||||
|
@ -456,7 +489,8 @@ define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $62, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||
|
@ -473,7 +507,8 @@ define void @load_v64i1_broadcast_63_v2i1(<64 x i1>* %a0,<2 x double> %a1,<2 x d
|
|||
define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x float> %a2,<4 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v4i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 60(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $60, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm2
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
; AVX512-NEXT: vpmovd2m %xmm2, %k1
|
||||
|
@ -483,7 +518,8 @@ define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $60, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm2, %xmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,3,3]
|
||||
|
@ -500,7 +536,8 @@ define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
|
|||
define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v8i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 56(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $56, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
|
||||
; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
|
||||
|
@ -512,7 +549,8 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
|
||||
|
@ -531,7 +569,8 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
|
|||
define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v16i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovw 48(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $48, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %zmm2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512-NEXT: vpermd %zmm2, %zmm3, %zmm2
|
||||
|
@ -543,7 +582,8 @@ define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 48(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $48, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512NOTDQ-NEXT: vpermd %zmm2, %zmm3, %zmm2
|
||||
|
@ -561,13 +601,16 @@ define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
|
|||
define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v2i1_broadcast_1_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 1(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $1, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v2i1_broadcast_1_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 1(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <2 x i1>, <2 x i1>* %a0
|
||||
|
@ -578,13 +621,26 @@ define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 1(%rdi), %k0
|
||||
; AVX512-NEXT: movzbl (%rdi), %eax
|
||||
; AVX512-NEXT: xorl %ecx, %ecx
|
||||
; AVX512-NEXT: btl $1, %eax
|
||||
; AVX512-NEXT: movl $255, %eax
|
||||
; AVX512-NEXT: cmovael %ecx, %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: kshiftrb $1, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 1(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
|
||||
; AVX512NOTDQ-NEXT: xorl %ecx, %ecx
|
||||
; AVX512NOTDQ-NEXT: btl $1, %eax
|
||||
; AVX512NOTDQ-NEXT: movl $255, %eax
|
||||
; AVX512NOTDQ-NEXT: cmovael %ecx, %eax
|
||||
; AVX512NOTDQ-NEXT: kmovd %eax, %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <3 x i1>, <3 x i1>* %a0
|
||||
|
@ -595,13 +651,26 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v3i1_broadcast_2_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 2(%rdi), %k0
|
||||
; AVX512-NEXT: movzbl (%rdi), %eax
|
||||
; AVX512-NEXT: xorl %ecx, %ecx
|
||||
; AVX512-NEXT: btl $2, %eax
|
||||
; AVX512-NEXT: movl $255, %eax
|
||||
; AVX512-NEXT: cmovael %ecx, %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: kshiftrb $2, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v3i1_broadcast_2_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 2(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
|
||||
; AVX512NOTDQ-NEXT: xorl %ecx, %ecx
|
||||
; AVX512NOTDQ-NEXT: btl $2, %eax
|
||||
; AVX512NOTDQ-NEXT: movl $255, %eax
|
||||
; AVX512NOTDQ-NEXT: cmovael %ecx, %eax
|
||||
; AVX512NOTDQ-NEXT: kmovd %eax, %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <3 x i1>, <3 x i1>* %a0
|
||||
|
@ -612,13 +681,16 @@ define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v4i1_broadcast_2_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 2(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $2, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v4i1_broadcast_2_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 2(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <4 x i1>, <4 x i1>* %a0
|
||||
|
@ -629,13 +701,16 @@ define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v4i1_broadcast_3_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 3(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $3, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v4i1_broadcast_3_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 3(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <4 x i1>, <4 x i1>* %a0
|
||||
|
@ -646,13 +721,16 @@ define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v8i1_broadcast_4_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 4(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $4, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 4(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <8 x i1>, <8 x i1>* %a0
|
||||
|
@ -663,7 +741,8 @@ define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v8i1_broadcast_4_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 4(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $4, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -672,7 +751,8 @@ define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 4(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
|
@ -688,13 +768,16 @@ define void @load_v8i1_broadcast_4_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v8i1_broadcast_7_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 7(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $7, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 7(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <8 x i1>, <8 x i1>* %a0
|
||||
|
@ -705,7 +788,8 @@ define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v8i1_broadcast_7_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 6(%rdi), %k0
|
||||
; AVX512-NEXT: kmovb (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrb $6, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -714,7 +798,8 @@ define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 6(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
|
@ -730,13 +815,16 @@ define void @load_v8i1_broadcast_7_v2i1_store(<8 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_8_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 8(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 8(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <16 x i1>, <16 x i1>* %a0
|
||||
|
@ -747,7 +835,8 @@ define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_8_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 8(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -756,7 +845,8 @@ define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
|
@ -772,7 +862,8 @@ define void @load_v16i1_broadcast_8_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_8_v4i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 8(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovd2m %xmm0, %k0
|
||||
|
@ -781,7 +872,8 @@ define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v4i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 8(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
|
@ -797,13 +889,16 @@ define void @load_v16i1_broadcast_8_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
|||
define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_15_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 15(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 15(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <16 x i1>, <16 x i1>* %a0
|
||||
|
@ -814,7 +909,8 @@ define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_15_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 14(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -823,7 +919,8 @@ define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 14(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
|
@ -839,7 +936,8 @@ define void @load_v16i1_broadcast_15_v2i1_store(<16 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v16i1_broadcast_15_v4i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 12(%rdi), %k0
|
||||
; AVX512-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrw $12, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX512-NEXT: vpmovd2m %xmm0, %k0
|
||||
|
@ -848,7 +946,8 @@ define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v4i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 12(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
|
@ -864,13 +963,16 @@ define void @load_v16i1_broadcast_15_v4i1_store(<16 x i1>* %a0,<4 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 16(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <32 x i1>, <32 x i1>* %a0
|
||||
|
@ -881,7 +983,8 @@ define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -890,7 +993,8 @@ define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
|
@ -906,7 +1010,8 @@ define void @load_v32i1_broadcast_16_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v4i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovd2m %xmm0, %k0
|
||||
|
@ -915,7 +1020,8 @@ define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v4i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
|
@ -931,7 +1037,8 @@ define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_16_v8i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 16(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $16, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm0
|
||||
; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; AVX512-NEXT: vpmovd2m %ymm0, %k0
|
||||
|
@ -941,7 +1048,8 @@ define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 16(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
|
@ -958,13 +1066,16 @@ define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 31(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $31, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 31(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $31, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <32 x i1>, <32 x i1>* %a0
|
||||
|
@ -975,7 +1086,8 @@ define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 30(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $30, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -984,7 +1096,8 @@ define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 30(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $30, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
|
@ -1000,7 +1113,8 @@ define void @load_v32i1_broadcast_31_v2i1_store(<32 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v4i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 28(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $28, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX512-NEXT: vpmovd2m %xmm0, %k0
|
||||
|
@ -1009,7 +1123,8 @@ define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v4i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 28(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $28, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
|
@ -1025,7 +1140,8 @@ define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
|
|||
define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v32i1_broadcast_31_v8i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 24(%rdi), %k0
|
||||
; AVX512-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrd $24, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm0
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
|
||||
; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
|
@ -1036,7 +1152,8 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 24(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
|
||||
|
@ -1054,13 +1171,16 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 32(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <64 x i1>, <64 x i1>* %a0
|
||||
|
@ -1071,7 +1191,8 @@ define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -1080,7 +1201,8 @@ define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
|
@ -1096,7 +1218,8 @@ define void @load_v64i1_broadcast_32_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v4i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovd2m %xmm0, %k0
|
||||
|
@ -1105,7 +1228,8 @@ define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v4i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %xmm0
|
||||
|
@ -1121,7 +1245,8 @@ define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v8i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm0
|
||||
; AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
; AVX512-NEXT: vpmovd2m %ymm0, %k0
|
||||
|
@ -1131,7 +1256,8 @@ define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %ymm0
|
||||
|
@ -1148,7 +1274,8 @@ define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_32_v16i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovw 32(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $32, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovd2m %zmm0, %k0
|
||||
|
@ -1158,7 +1285,8 @@ define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 32(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; AVX512NOTDQ-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
|
@ -1173,13 +1301,16 @@ define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
|
|||
define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v1i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 63(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $63, %k0, %k0
|
||||
; AVX512-NEXT: kmovb %k0, (%rsi)
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v1i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: movb 63(%rdi), %al
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $63, %k0, %k0
|
||||
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
|
||||
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
|
||||
; AVX512NOTDQ-NEXT: retq
|
||||
%d0 = load <64 x i1>, <64 x i1>* %a0
|
||||
|
@ -1190,7 +1321,8 @@ define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v2i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 62(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $62, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2q %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; AVX512-NEXT: vpmovq2m %xmm0, %k0
|
||||
|
@ -1199,7 +1331,8 @@ define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v2i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 62(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $62, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
|
@ -1215,7 +1348,8 @@ define void @load_v64i1_broadcast_63_v2i1_store(<64 x i1>* %a0,<2 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v4i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 60(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $60, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %xmm0
|
||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
; AVX512-NEXT: vpmovd2m %xmm0, %k0
|
||||
|
@ -1224,7 +1358,8 @@ define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v4i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 60(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $60, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
|
||||
|
@ -1240,7 +1375,8 @@ define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v8i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovb 56(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $56, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %ymm0
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
|
||||
; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
|
@ -1251,7 +1387,8 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 56(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
|
||||
|
@ -1269,7 +1406,8 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
|
|||
define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
|
||||
; AVX512-LABEL: load_v64i1_broadcast_63_v16i1_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: kmovw 48(%rdi), %k0
|
||||
; AVX512-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512-NEXT: kshiftrq $48, %k0, %k0
|
||||
; AVX512-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512-NEXT: vpermd %zmm0, %zmm1, %zmm0
|
||||
|
@ -1280,7 +1418,8 @@ define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
|
|||
;
|
||||
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1_store:
|
||||
; AVX512NOTDQ: # %bb.0:
|
||||
; AVX512NOTDQ-NEXT: kmovw 48(%rdi), %k1
|
||||
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
|
||||
; AVX512NOTDQ-NEXT: kshiftrq $48, %k0, %k1
|
||||
; AVX512NOTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512NOTDQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
|
||||
|
|
|
@ -278,7 +278,10 @@ define i8 @shuf_test1(i16 %v) nounwind {
|
|||
;
|
||||
; X86-LABEL: shuf_test1:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
|
||||
; X86-NEXT: kshiftrw $8, %k0, %k0
|
||||
; X86-NEXT: kmovd %k0, %eax
|
||||
; X86-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
%v1 = bitcast i16 %v to <16 x i1>
|
||||
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
|
|
@ -7,8 +7,8 @@ declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
|
|||
define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_kunpck_wd:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
|
||||
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
|
@ -29,8 +29,10 @@ declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
|
|||
define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
|
||||
; X86-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
|
||||
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
|
||||
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
|
|
Loading…
Reference in New Issue