From 8d6a863b02f199960590973f06b4c4dd29df578f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Oct 2019 04:45:02 +0000 Subject: [PATCH] [X86] Add broadcast load folding patterns to the NoVLX compare patterns. These patterns use zmm registers for 128/256-bit compares when the VLX instructions aren't available. Previously we only supported registers, but as PR36191 notes we can fold broadcast loads, but not regular loads. llvm-svn: 373423 --- llvm/lib/Target/X86/X86InstrAVX512.td | 154 +++++- llvm/test/CodeGen/X86/avx512-vec-cmp.ll | 26 +- llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll | 24 +- .../CodeGen/X86/avx512vl-vec-masked-cmp.ll | 480 ++++++------------ llvm/test/CodeGen/X86/combine-bitselect.ll | 6 +- llvm/test/CodeGen/X86/vector-trunc-usat.ll | 6 +- 6 files changed, 318 insertions(+), 378 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 4064d020cc44..2d3b8a556816 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2392,7 +2392,7 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), cond))), (!cast(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - (CommFrag.OperandTransform $cc))>; + (CommFrag_su.OperandTransform $cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, PatFrag Frag, @@ -3172,6 +3172,30 @@ multiclass axv512_icmp_packed_no_vlx_lowering; } +multiclass axv512_icmp_packed_rmb_no_vlx_lowering { + // Broadcast load. + def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)))), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmb") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2), + Narrow.KRC)>; + + def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Frag_su (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2)))), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2), + Narrow.KRC)>; +} + // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. multiclass axv512_icmp_packed_cc_no_vlx_lowering(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), (Frag.OperandTransform $cc)), Narrow.KRC)>; @@ -3189,34 +3213,108 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - (Frag.OperandTransform $cc)), Narrow.KRC)>; + (Frag_su.OperandTransform $cc)), Narrow.KRC)>; +} + +multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering { +// Broadcast load. +def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; } // Same as above, but for fp types which don't use PatFrags. -multiclass axv512_cmp_packed_cc_no_vlx_lowering { -def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS - (!cast(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), timm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), timm:$cc), Narrow.KRC)>; + +// Broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { @@ -3234,6 +3332,18 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; } defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3248,10 +3358,22 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } let Predicates = [HasBWI, NoVLX] in { diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index 88910fa1749c..0b518ea6a5bc 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -861,8 +861,7 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17] -; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -887,8 +886,7 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* % ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17] -; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -913,9 +911,7 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17] -; AVX512-NEXT: ## xmm2 = mem[0,0] -; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -941,9 +937,7 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* % ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17] -; AVX512-NEXT: ## xmm2 = mem[0,0] -; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -1002,8 +996,7 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -1028,8 +1021,7 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %p ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -1054,8 +1046,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -1081,8 +1072,7 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %p ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll index 5024e67fdb79..55e127e4f418 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -378,8 +378,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -402,8 +401,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -429,8 +427,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -458,8 +455,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -937,8 +933,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -961,8 +956,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -988,8 +982,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -1017,8 +1010,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index f7ee5b219d72..f3f65c545640 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -1191,8 +1191,7 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1222,9 +1221,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1382,8 +1380,7 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1413,9 +1410,8 @@ define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1564,8 +1560,7 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1593,9 +1588,8 @@ define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1743,8 +1737,7 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1772,9 +1765,8 @@ define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1934,8 +1926,7 @@ define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1966,9 +1957,8 @@ define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2119,8 +2109,7 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2149,9 +2138,8 @@ define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2301,8 +2289,7 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2331,9 +2318,8 @@ define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2796,8 +2782,7 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2825,9 +2810,8 @@ define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2984,8 +2968,7 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3015,9 +2998,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3175,8 +3157,7 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3206,9 +3187,8 @@ define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3357,8 +3337,7 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3386,9 +3365,8 @@ define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3536,8 +3514,7 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3565,9 +3542,8 @@ define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3729,8 +3705,7 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3761,9 +3736,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3926,8 +3900,7 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3958,9 +3931,8 @@ define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4114,8 +4086,7 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4144,9 +4115,8 @@ define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4299,8 +4269,7 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4329,9 +4298,8 @@ define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6027,8 +5995,7 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6058,9 +6025,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6218,8 +6184,7 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6249,9 +6214,8 @@ define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6400,8 +6364,7 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6429,9 +6392,8 @@ define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6579,8 +6541,7 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6608,9 +6569,8 @@ define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6770,8 +6730,7 @@ define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6802,9 +6761,8 @@ define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6955,8 +6913,7 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6985,9 +6942,8 @@ define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7137,8 +7093,7 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7167,9 +7122,8 @@ define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7632,8 +7586,7 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7661,9 +7614,8 @@ define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7820,8 +7772,7 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7851,9 +7802,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8011,8 +7961,7 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8042,9 +7991,8 @@ define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8193,8 +8141,7 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8222,9 +8169,8 @@ define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8372,8 +8318,7 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8401,9 +8346,8 @@ define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8565,8 +8509,7 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8597,9 +8540,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8762,8 +8704,7 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8794,9 +8735,8 @@ define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8950,8 +8890,7 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8980,9 +8919,8 @@ define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -9135,8 +9073,7 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -9165,9 +9102,8 @@ define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -10923,8 +10859,7 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -10954,9 +10889,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11114,8 +11048,7 @@ define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11145,9 +11078,8 @@ define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11296,8 +11228,7 @@ define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11325,9 +11256,8 @@ define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11475,8 +11405,7 @@ define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11504,9 +11433,8 @@ define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11666,8 +11594,7 @@ define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11698,9 +11625,8 @@ define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11851,8 +11777,7 @@ define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11881,9 +11806,8 @@ define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12033,8 +11957,7 @@ define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12063,9 +11986,8 @@ define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12528,8 +12450,7 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12557,9 +12478,8 @@ define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12716,8 +12636,7 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12747,9 +12666,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12907,8 +12825,7 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12938,9 +12855,8 @@ define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13089,8 +13005,7 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13118,9 +13033,8 @@ define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13268,8 +13182,7 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13297,9 +13210,8 @@ define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13461,8 +13373,7 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13493,9 +13404,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13658,8 +13568,7 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13690,9 +13599,8 @@ define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13846,8 +13754,7 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13876,9 +13783,8 @@ define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -14031,8 +13937,7 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -14061,9 +13966,8 @@ define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -15839,8 +15743,7 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -15870,9 +15773,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16030,8 +15932,7 @@ define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16061,9 +15962,8 @@ define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16212,8 +16112,7 @@ define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16241,9 +16140,8 @@ define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16391,8 +16289,7 @@ define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16420,9 +16317,8 @@ define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16582,8 +16478,7 @@ define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16614,9 +16509,8 @@ define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16767,8 +16661,7 @@ define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16797,9 +16690,8 @@ define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16949,8 +16841,7 @@ define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16979,9 +16870,8 @@ define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17444,8 +17334,7 @@ define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17473,9 +17362,8 @@ define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17632,8 +17520,7 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17663,9 +17550,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17823,8 +17709,7 @@ define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17854,9 +17739,8 @@ define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18005,8 +17889,7 @@ define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18034,9 +17917,8 @@ define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18184,8 +18066,7 @@ define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18213,9 +18094,8 @@ define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18377,8 +18257,7 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18409,9 +18288,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18574,8 +18452,7 @@ define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18606,9 +18483,8 @@ define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18762,8 +18638,7 @@ define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18792,9 +18667,8 @@ define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18947,8 +18821,7 @@ define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18977,9 +18850,8 @@ define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19555,8 +19427,7 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %_ ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19652,8 +19523,7 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19743,8 +19613,7 @@ define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19840,8 +19709,7 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19926,8 +19794,7 @@ define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20017,8 +19884,7 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20102,8 +19968,7 @@ define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20193,8 +20058,7 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20286,8 +20150,7 @@ define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20385,9 +20248,8 @@ define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20475,8 +20337,7 @@ define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20568,9 +20429,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20657,8 +20517,7 @@ define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20750,9 +20609,8 @@ define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21243,8 +21101,7 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* % ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21334,8 +21191,7 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21424,8 +21280,7 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* % ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21521,8 +21376,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21612,8 +21466,7 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21709,8 +21562,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21795,8 +21647,7 @@ define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21886,8 +21737,7 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21971,8 +21821,7 @@ define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22062,8 +21911,7 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22155,8 +22003,7 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* % ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22255,8 +22102,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22349,8 +22195,7 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22449,8 +22294,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22538,8 +22382,7 @@ define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22632,8 +22475,7 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22720,8 +22562,7 @@ define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22814,8 +22655,7 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index ccb969b747f9..b65a7504b24a 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -608,10 +608,8 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) { ; AVX512F: # %bb.0: # %bb ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12] -; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] -; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k2 +; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k1 +; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k2 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k2} ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} ; AVX512F-NEXT: korw %k0, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index fa03c8984ad7..f1c19d91cd32 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -156,8 +156,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v4i64_v4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] -; AVX512F-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vpmovqd %zmm1, %ymm0 @@ -177,8 +176,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_usat_v4i64_v4i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] -; AVX512BW-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm0