[X86] Replace -mcpu option with -mattr in LIT tests added in https://reviews.llvm.org/rL312442

llvm-svn: 312474
This commit is contained in:
Ayman Musa 2017-09-04 09:31:32 +00:00
parent 2661ae48c7
commit 5defce3986
13 changed files with 953 additions and 952 deletions

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
define <4 x double> @test_double_to_4(double %s) {
; CHECK-LABEL: test_double_to_4:
@ -14,7 +14,7 @@ define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %defa
; CHECK-LABEL: test_masked_double_to_4_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -28,7 +28,7 @@ define <4 x double> @test_masked_z_double_to_4_mask0(double %s) {
; CHECK-LABEL: test_masked_z_double_to_4_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -40,7 +40,7 @@ define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %defa
; CHECK-LABEL: test_masked_double_to_4_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -54,7 +54,7 @@ define <4 x double> @test_masked_z_double_to_4_mask1(double %s) {
; CHECK-LABEL: test_masked_z_double_to_4_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -66,7 +66,7 @@ define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %defa
; CHECK-LABEL: test_masked_double_to_4_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -80,7 +80,7 @@ define <4 x double> @test_masked_z_double_to_4_mask2(double %s) {
; CHECK-LABEL: test_masked_z_double_to_4_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -92,7 +92,7 @@ define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %defa
; CHECK-LABEL: test_masked_double_to_4_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -106,7 +106,7 @@ define <4 x double> @test_masked_z_double_to_4_mask3(double %s) {
; CHECK-LABEL: test_masked_z_double_to_4_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -127,7 +127,7 @@ define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %defa
; CHECK-LABEL: test_masked_double_to_8_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -141,7 +141,7 @@ define <8 x double> @test_masked_z_double_to_8_mask0(double %s) {
; CHECK-LABEL: test_masked_z_double_to_8_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -153,7 +153,7 @@ define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %defa
; CHECK-LABEL: test_masked_double_to_8_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $103, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -167,7 +167,7 @@ define <8 x double> @test_masked_z_double_to_8_mask1(double %s) {
; CHECK-LABEL: test_masked_z_double_to_8_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $103, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -179,7 +179,7 @@ define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %defa
; CHECK-LABEL: test_masked_double_to_8_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-56, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -193,7 +193,7 @@ define <8 x double> @test_masked_z_double_to_8_mask2(double %s) {
; CHECK-LABEL: test_masked_z_double_to_8_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-56, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -205,7 +205,7 @@ define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %defa
; CHECK-LABEL: test_masked_double_to_8_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $78, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -219,7 +219,7 @@ define <8 x double> @test_masked_z_double_to_8_mask3(double %s) {
; CHECK-LABEL: test_masked_z_double_to_8_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $78, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x double> undef, double %s, i32 0
@ -240,7 +240,7 @@ define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default)
; CHECK-LABEL: test_masked_float_to_4_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -254,7 +254,7 @@ define <4 x float> @test_masked_z_float_to_4_mask0(float %s) {
; CHECK-LABEL: test_masked_z_float_to_4_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -266,7 +266,7 @@ define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default)
; CHECK-LABEL: test_masked_float_to_4_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -280,7 +280,7 @@ define <4 x float> @test_masked_z_float_to_4_mask1(float %s) {
; CHECK-LABEL: test_masked_z_float_to_4_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -292,7 +292,7 @@ define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default)
; CHECK-LABEL: test_masked_float_to_4_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -306,7 +306,7 @@ define <4 x float> @test_masked_z_float_to_4_mask2(float %s) {
; CHECK-LABEL: test_masked_z_float_to_4_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -318,7 +318,7 @@ define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default)
; CHECK-LABEL: test_masked_float_to_4_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -332,7 +332,7 @@ define <4 x float> @test_masked_z_float_to_4_mask3(float %s) {
; CHECK-LABEL: test_masked_z_float_to_4_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -353,7 +353,7 @@ define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default)
; CHECK-LABEL: test_masked_float_to_8_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $72, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -367,7 +367,7 @@ define <8 x float> @test_masked_z_float_to_8_mask0(float %s) {
; CHECK-LABEL: test_masked_z_float_to_8_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $72, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -379,7 +379,7 @@ define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default)
; CHECK-LABEL: test_masked_float_to_8_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-64, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -393,7 +393,7 @@ define <8 x float> @test_masked_z_float_to_8_mask1(float %s) {
; CHECK-LABEL: test_masked_z_float_to_8_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-64, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -405,7 +405,7 @@ define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default)
; CHECK-LABEL: test_masked_float_to_8_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -419,7 +419,7 @@ define <8 x float> @test_masked_z_float_to_8_mask2(float %s) {
; CHECK-LABEL: test_masked_z_float_to_8_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -431,7 +431,7 @@ define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default)
; CHECK-LABEL: test_masked_float_to_8_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $64, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -445,7 +445,7 @@ define <8 x float> @test_masked_z_float_to_8_mask3(float %s) {
; CHECK-LABEL: test_masked_z_float_to_8_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $64, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -466,7 +466,7 @@ define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %defau
; CHECK-LABEL: test_masked_float_to_16_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -480,7 +480,7 @@ define <16 x float> @test_masked_z_float_to_16_mask0(float %s) {
; CHECK-LABEL: test_masked_z_float_to_16_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -492,7 +492,7 @@ define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %defau
; CHECK-LABEL: test_masked_float_to_16_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -506,7 +506,7 @@ define <16 x float> @test_masked_z_float_to_16_mask1(float %s) {
; CHECK-LABEL: test_masked_z_float_to_16_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -518,7 +518,7 @@ define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %defau
; CHECK-LABEL: test_masked_float_to_16_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -532,7 +532,7 @@ define <16 x float> @test_masked_z_float_to_16_mask2(float %s) {
; CHECK-LABEL: test_masked_z_float_to_16_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -544,7 +544,7 @@ define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %defau
; CHECK-LABEL: test_masked_float_to_16_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -558,7 +558,7 @@ define <16 x float> @test_masked_z_float_to_16_mask3(float %s) {
; CHECK-LABEL: test_masked_z_float_to_16_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = insertelement <2 x float> undef, float %s, i32 0
@ -580,7 +580,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double>
; CHECK-LABEL: test_masked_double_to_4_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -594,7 +594,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -607,7 +607,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double>
; CHECK-LABEL: test_masked_double_to_4_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -621,7 +621,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -634,7 +634,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double>
; CHECK-LABEL: test_masked_double_to_4_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -648,7 +648,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -661,7 +661,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double>
; CHECK-LABEL: test_masked_double_to_4_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -675,7 +675,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -698,7 +698,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double>
; CHECK-LABEL: test_masked_double_to_8_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $120, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -712,7 +712,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $120, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -725,7 +725,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double>
; CHECK-LABEL: test_masked_double_to_8_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $26, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -739,7 +739,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $26, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -752,7 +752,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double>
; CHECK-LABEL: test_masked_double_to_8_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $111, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -766,7 +766,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $111, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -779,7 +779,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double>
; CHECK-LABEL: test_masked_double_to_8_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-100, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -793,7 +793,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p) {
; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-100, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load double, double* %p
@ -816,7 +816,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %def
; CHECK-LABEL: test_masked_float_to_4_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -830,7 +830,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -843,7 +843,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %def
; CHECK-LABEL: test_masked_float_to_4_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -857,7 +857,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -870,7 +870,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %def
; CHECK-LABEL: test_masked_float_to_4_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -884,7 +884,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -897,7 +897,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %def
; CHECK-LABEL: test_masked_float_to_4_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -911,7 +911,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -934,7 +934,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %def
; CHECK-LABEL: test_masked_float_to_8_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $67, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -948,7 +948,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $67, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -961,7 +961,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %def
; CHECK-LABEL: test_masked_float_to_8_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-51, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -975,7 +975,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-51, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -988,7 +988,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %def
; CHECK-LABEL: test_masked_float_to_8_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1002,7 +1002,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1015,7 +1015,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %def
; CHECK-LABEL: test_masked_float_to_8_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1029,7 +1029,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1052,7 +1052,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %
; CHECK-LABEL: test_masked_float_to_16_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-18370, %ax # imm = 0xB83E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1066,7 +1066,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-18370, %ax # imm = 0xB83E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1079,7 +1079,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %
; CHECK-LABEL: test_masked_float_to_16_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $26137, %ax # imm = 0x6619
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1093,7 +1093,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $26137, %ax # imm = 0x6619
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1106,7 +1106,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %
; CHECK-LABEL: test_masked_float_to_16_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1120,7 +1120,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1133,7 +1133,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %
; CHECK-LABEL: test_masked_float_to_16_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%s = load float, float* %p
@ -1147,7 +1147,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p) {
; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%s = load float, float* %p

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
define <16 x i8> @test_i8_to_16(i8 %s) {
; CHECK-LABEL: test_i8_to_16:

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s
define <8 x float> @test_2xfloat_to_8xfloat(<8 x float> %vec) {
; CHECK-LABEL: test_2xfloat_to_8xfloat:
@ -13,7 +13,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -26,7 +26,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -37,7 +37,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -50,7 +50,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -61,7 +61,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-35, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -74,7 +74,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-35, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -85,7 +85,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x
; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -98,7 +98,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -117,7 +117,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <1
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $21312, %ax # imm = 0x5340
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -130,7 +130,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec)
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $21312, %ax # imm = 0x5340
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -141,7 +141,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <1
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -154,7 +154,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec)
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -165,7 +165,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <1
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -178,7 +178,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec)
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -189,7 +189,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <1
; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -202,7 +202,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec)
; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -222,7 +222,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -235,7 +235,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -247,7 +247,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -260,7 +260,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -272,7 +272,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -285,7 +285,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -297,7 +297,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -310,7 +310,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -331,7 +331,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $21, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -344,7 +344,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $21, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -356,7 +356,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $82, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -369,7 +369,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $82, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -381,7 +381,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -394,7 +394,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -406,7 +406,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %v
; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-19, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -419,7 +419,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>*
; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-19, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -440,7 +440,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %v
; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $28, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -453,7 +453,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>*
; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $28, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -465,7 +465,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %v
; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-115, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -478,7 +478,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>*
; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-115, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -490,7 +490,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %v
; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-76, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -503,7 +503,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>*
; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-76, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -515,7 +515,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %v
; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -528,7 +528,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>*
; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -551,7 +551,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movb $-49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -565,7 +565,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp)
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movb $-49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -578,7 +578,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movb $-118, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -592,7 +592,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp)
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movb $-118, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -605,7 +605,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movb $-11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -619,7 +619,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp)
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movb $-11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -632,7 +632,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movb $-102, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -646,7 +646,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp)
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movb $-102, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -669,7 +669,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp,
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -683,7 +683,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %v
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -696,7 +696,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp,
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -710,7 +710,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %v
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -723,7 +723,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp,
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -737,7 +737,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %v
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -750,7 +750,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp,
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movw $25225, %ax # imm = 0x6289
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -764,7 +764,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %v
; CHECK: # BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movw $25225, %ax # imm = 0x6289
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x float>, <2 x float>* %vp
@ -785,7 +785,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <
; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-109, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -798,7 +798,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp)
; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-109, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -810,7 +810,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <
; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $74, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -823,7 +823,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp)
; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $74, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -835,7 +835,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <
; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -848,7 +848,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp)
; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -860,7 +860,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <
; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $48, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -873,7 +873,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp)
; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $48, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -894,7 +894,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp,
; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-25378, %ax # imm = 0x9CDE
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -907,7 +907,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %v
; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-25378, %ax # imm = 0x9CDE
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -919,7 +919,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp,
; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-22502, %ax # imm = 0xA81A
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -932,7 +932,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %v
; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-22502, %ax # imm = 0xA81A
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -944,7 +944,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp,
; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -957,7 +957,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %v
; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -969,7 +969,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp,
; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -982,7 +982,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %v
; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -1003,7 +1003,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp,
; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1016,7 +1016,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %v
; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1028,7 +1028,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp,
; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1041,7 +1041,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %v
; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1053,7 +1053,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp,
; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1066,7 +1066,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %v
; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1078,7 +1078,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp,
; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1091,7 +1091,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %v
; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s
; FIXME: fixing PR34394 should fix the i32x2 memory cases resulting in a simple vbroadcasti32x2 instruction.
@ -15,7 +15,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
@ -28,7 +28,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask0(<4 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@ -39,7 +39,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
@ -52,7 +52,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask1(<4 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@ -63,7 +63,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
@ -76,7 +76,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask2(<4 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@ -87,7 +87,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
@ -100,7 +100,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask3(<4 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@ -119,7 +119,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $92, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
@ -132,7 +132,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask0(<8 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $92, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -143,7 +143,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-15, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
@ -156,7 +156,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask1(<8 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-15, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -167,7 +167,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-95, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
@ -180,7 +180,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask2(<8 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-95, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -191,7 +191,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %de
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
@ -204,7 +204,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask3(<8 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -223,7 +223,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32>
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-18638, %ax # imm = 0xB732
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
@ -236,7 +236,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask0(<16 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-18638, %ax # imm = 0xB732
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -247,7 +247,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32>
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $25429, %ax # imm = 0x6355
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
@ -260,7 +260,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask1(<16 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $25429, %ax # imm = 0x6355
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -271,7 +271,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32>
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $27159, %ax # imm = 0x6A17
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
@ -284,7 +284,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask2(<16 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $27159, %ax # imm = 0x6A17
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -295,7 +295,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32>
; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-22884, %ax # imm = 0xA69C
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
@ -308,7 +308,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec) {
; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-22884, %ax # imm = 0xA69C
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@ -330,7 +330,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp, <4 x i32>
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -344,7 +344,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -357,7 +357,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp, <4 x i32>
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -371,7 +371,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -384,7 +384,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp, <4 x i32>
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -398,7 +398,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -411,7 +411,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp, <4 x i32>
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -425,7 +425,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -450,7 +450,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp, <8 x i32>
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: movb $-94, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -465,7 +465,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: movb $-94, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -479,7 +479,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp, <8 x i32>
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: movb $97, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -494,7 +494,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: movb $97, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -508,7 +508,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp, <8 x i32>
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: movb $-33, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -523,7 +523,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: movb $-33, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -537,7 +537,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp, <8 x i32>
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: movb $-111, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -552,7 +552,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: movb $-111, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -577,7 +577,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -592,7 +592,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -606,7 +606,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -621,7 +621,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -635,7 +635,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $-19542, %ax # imm = 0xB3AA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -650,7 +650,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $-19542, %ax # imm = 0xB3AA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -664,7 +664,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $27409, %ax # imm = 0x6B11
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -679,7 +679,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp) {
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2]
; CHECK-NEXT: movw $27409, %ax # imm = 0x6B11
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
@ -700,7 +700,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask0(<4 x i32>* %vp, <8 x i32>
; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-87, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -713,7 +713,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask0(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-87, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -725,7 +725,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask1(<4 x i32>* %vp, <8 x i32>
; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -738,7 +738,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask1(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -750,7 +750,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask2(<4 x i32>* %vp, <8 x i32>
; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -763,7 +763,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask2(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -775,7 +775,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask3(<4 x i32>* %vp, <8 x i32>
; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $66, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -788,7 +788,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask3(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $66, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -809,7 +809,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask0(<4 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $10334, %ax # imm = 0x285E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -822,7 +822,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask0(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $10334, %ax # imm = 0x285E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -834,7 +834,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask1(<4 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-30962, %ax # imm = 0x870E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -847,7 +847,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask1(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-30962, %ax # imm = 0x870E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -859,7 +859,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask2(<4 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $31933, %ax # imm = 0x7CBD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -872,7 +872,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask2(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $31933, %ax # imm = 0x7CBD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -884,7 +884,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask3(<4 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28744, %ax # imm = 0x8FB8
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -897,7 +897,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask3(<4 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28744, %ax # imm = 0x8FB8
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i32>, <4 x i32>* %vp
@ -918,7 +918,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask0(<2 x i64>* %vp, <4 x i64>
; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -931,7 +931,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask0(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -943,7 +943,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask1(<2 x i64>* %vp, <4 x i64>
; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -956,7 +956,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask1(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -968,7 +968,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask2(<2 x i64>* %vp, <4 x i64>
; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -981,7 +981,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask2(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -993,7 +993,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask3(<2 x i64>* %vp, <4 x i64>
; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1006,7 +1006,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask3(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1027,7 +1027,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask0(<2 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $119, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1040,7 +1040,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask0(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $119, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1052,7 +1052,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask1(<2 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-50, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1065,7 +1065,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask1(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-50, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1077,7 +1077,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask2(<2 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-33, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1090,7 +1090,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask2(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-33, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1102,7 +1102,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask3(<2 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1115,7 +1115,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask3(<2 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
; CHECK-NEXT: retq
%vec = load <2 x i64>, <2 x i64>* %vp
@ -1136,7 +1136,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask0(<8 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $12321, %ax # imm = 0x3021
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1149,7 +1149,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask0(<8 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $12321, %ax # imm = 0x3021
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1161,7 +1161,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask1(<8 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-39, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1174,7 +1174,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask1(<8 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-39, %ax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1186,7 +1186,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask2(<8 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-24047, %ax # imm = 0xA211
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1199,7 +1199,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask2(<8 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-24047, %ax # imm = 0xA211
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1211,7 +1211,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask3(<8 x i32>* %vp, <16 x i
; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5470, %ax # imm = 0x155E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1224,7 +1224,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask3(<8 x i32>* %vp) {
; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5470, %ax # imm = 0x155E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %vp
@ -1245,7 +1245,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask0(<4 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-71, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1258,7 +1258,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask0(<4 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-71, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1270,7 +1270,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask1(<4 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1283,7 +1283,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask1(<4 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1295,7 +1295,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask2(<4 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $103, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1308,7 +1308,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask2(<4 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $103, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1320,7 +1320,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask3(<4 x i64>* %vp, <8 x i64>
; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-83, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -1333,7 +1333,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask3(<4 x i64>* %vp) {
; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-83, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
define <4 x float> @test_4xfloat_dup_high(<4 x float> %vec) {
; CHECK-LABEL: test_4xfloat_dup_high:
@ -13,7 +13,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -26,7 +26,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@ -37,7 +37,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -50,7 +50,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@ -61,7 +61,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -74,7 +74,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@ -85,7 +85,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -98,7 +98,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@ -109,7 +109,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_dup_high_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -122,7 +122,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask4(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@ -142,7 +142,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -155,7 +155,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -167,7 +167,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -180,7 +180,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -192,7 +192,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -205,7 +205,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -217,7 +217,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -230,7 +230,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -242,7 +242,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4
; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -255,7 +255,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask4(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -275,7 +275,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-106, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -288,7 +288,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-106, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@ -299,7 +299,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -312,7 +312,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@ -323,7 +323,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-104, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -336,7 +336,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-104, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@ -347,7 +347,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -360,7 +360,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@ -371,7 +371,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_dup_high_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-109, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -384,7 +384,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask4(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-109, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@ -404,7 +404,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $74, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -417,7 +417,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $74, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -429,7 +429,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -442,7 +442,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -454,7 +454,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $48, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -467,7 +467,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $48, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -479,7 +479,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-100, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -492,7 +492,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-100, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -504,7 +504,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-89, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -517,7 +517,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask4(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-89, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -537,7 +537,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -550,7 +550,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
@ -561,7 +561,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -574,7 +574,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
@ -585,7 +585,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -598,7 +598,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
@ -609,7 +609,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -622,7 +622,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
@ -633,7 +633,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_dup_high_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -646,7 +646,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask4(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
@ -666,7 +666,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -679,7 +679,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -691,7 +691,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -704,7 +704,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -716,7 +716,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -729,7 +729,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -741,7 +741,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -754,7 +754,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -766,7 +766,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $25644, %ax # imm = 0x642C
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -779,7 +779,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask4(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $25644, %ax # imm = 0x642C
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
define <2 x double> @test_2xdouble_dup_low(<2 x double> %vec) {
; CHECK-LABEL: test_2xdouble_dup_low:
@ -13,7 +13,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x
; CHECK-LABEL: test_masked_2xdouble_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
@ -26,7 +26,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec) {
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; CHECK-NEXT: retq
%shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
@ -37,7 +37,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x
; CHECK-LABEL: test_masked_2xdouble_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
@ -50,7 +50,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mask1(<2 x double> %vec) {
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; CHECK-NEXT: retq
%shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
@ -70,7 +70,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, <
; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -83,7 +83,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(<2 x double>* %vp)
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -95,7 +95,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, <
; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -108,7 +108,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask1(<2 x double>* %vp)
; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -128,7 +128,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -141,7 +141,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -152,7 +152,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -165,7 +165,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -176,7 +176,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -189,7 +189,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -200,7 +200,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -213,7 +213,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -224,7 +224,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x
; CHECK-LABEL: test_masked_4xdouble_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -237,7 +237,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask4(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -257,7 +257,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, <
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $9, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -270,7 +270,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(<4 x double>* %vp)
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $9, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -282,7 +282,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, <
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -295,7 +295,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(<4 x double>* %vp)
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -307,7 +307,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, <
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -320,7 +320,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(<4 x double>* %vp)
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -332,7 +332,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, <
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -345,7 +345,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(<4 x double>* %vp)
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -357,7 +357,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, <
; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -370,7 +370,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask4(<4 x double>* %vp)
; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -390,7 +390,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -403,7 +403,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -414,7 +414,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $64, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -427,7 +427,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $64, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -438,7 +438,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-24, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -451,7 +451,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-24, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -462,7 +462,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -475,7 +475,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -486,7 +486,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x
; CHECK-LABEL: test_masked_8xdouble_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-50, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -499,7 +499,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask4(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-50, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -519,7 +519,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, <
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-26, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -532,7 +532,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(<8 x double>* %vp)
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-26, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -544,7 +544,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, <
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $79, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -557,7 +557,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(<8 x double>* %vp)
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $79, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -569,7 +569,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, <
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-70, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -582,7 +582,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(<8 x double>* %vp)
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-70, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -594,7 +594,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, <
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-27, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -607,7 +607,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(<8 x double>* %vp)
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-27, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -619,7 +619,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, <
; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-82, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -632,7 +632,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask4(<8 x double>* %vp)
; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-82, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -652,7 +652,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x flo
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -665,7 +665,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -676,7 +676,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x flo
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -689,7 +689,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -700,7 +700,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x flo
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -713,7 +713,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -724,7 +724,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x flo
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -737,7 +737,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -748,7 +748,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x flo
; CHECK-LABEL: test_masked_4xfloat_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -761,7 +761,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask4(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@ -781,7 +781,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -794,7 +794,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -806,7 +806,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -819,7 +819,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -831,7 +831,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -844,7 +844,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -856,7 +856,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -869,7 +869,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -881,7 +881,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x
; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $9, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -894,7 +894,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask4(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $9, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -914,7 +914,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x flo
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -927,7 +927,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -938,7 +938,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x flo
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -951,7 +951,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -962,7 +962,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x flo
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-73, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -975,7 +975,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-73, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -986,7 +986,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x flo
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $102, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -999,7 +999,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $102, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -1010,7 +1010,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x flo
; CHECK-LABEL: test_masked_8xfloat_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-46, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1023,7 +1023,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask4(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-46, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@ -1043,7 +1043,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-86, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1056,7 +1056,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-86, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1068,7 +1068,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1081,7 +1081,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1093,7 +1093,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1106,7 +1106,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1118,7 +1118,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-35, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1131,7 +1131,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-35, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1143,7 +1143,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x
; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1156,7 +1156,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask4(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -1176,7 +1176,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $21312, %ax # imm = 0x5340
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1189,7 +1189,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $21312, %ax # imm = 0x5340
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
@ -1200,7 +1200,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1213,7 +1213,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
@ -1224,7 +1224,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1237,7 +1237,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
@ -1248,7 +1248,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1261,7 +1261,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
@ -1272,7 +1272,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x
; CHECK-LABEL: test_masked_16xfloat_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $15638, %ax # imm = 0x3D16
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1285,7 +1285,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask4(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $15638, %ax # imm = 0x3D16
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
@ -1305,7 +1305,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, <
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-2129, %ax # imm = 0xF7AF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1318,7 +1318,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(<16 x float>* %vp)
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-2129, %ax # imm = 0xF7AF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1330,7 +1330,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, <
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-12900, %ax # imm = 0xCD9C
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1343,7 +1343,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(<16 x float>* %vp)
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-12900, %ax # imm = 0xCD9C
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1355,7 +1355,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, <
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $29358, %ax # imm = 0x72AE
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1368,7 +1368,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(<16 x float>* %vp)
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $29358, %ax # imm = 0x72AE
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1380,7 +1380,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, <
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5272, %ax # imm = 0x1498
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1393,7 +1393,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(<16 x float>* %vp)
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $5272, %ax # imm = 0x1498
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1405,7 +1405,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, <
; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $20975, %ax # imm = 0x51EF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1418,7 +1418,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask4(<16 x float>* %vp)
; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movw $20975, %ax # imm = 0x51EF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
; FIXME: The non immediate <16 x float> test cases should be fixed by PR34382
@ -15,7 +15,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float>
; CHECK-LABEL: test_masked_4xfloat_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[2,1,3,1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -28,7 +28,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,3,1]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 1>
@ -39,7 +39,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float>
; CHECK-LABEL: test_masked_4xfloat_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -52,7 +52,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
@ -63,7 +63,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float>
; CHECK-LABEL: test_masked_4xfloat_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,3,2,1]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -76,7 +76,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,2,1]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 1>
@ -95,7 +95,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float>
; CHECK-LABEL: test_masked_4xfloat_perm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -108,7 +108,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask3(<4 x float> %vec) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
@ -128,7 +128,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,3,1,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -141,7 +141,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,3,1,3]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -154,7 +154,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[1,3,2,0]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -167,7 +167,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[1,3,2,0]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -180,7 +180,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[2,1,3,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -193,7 +193,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask2(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -215,7 +215,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x fl
; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[0,1,3,0]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -228,7 +228,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask3(<4 x float>* %vp) {
; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,0]
; CHECK-NEXT: retq
%vec = load <4 x float>, <4 x float>* %vp
@ -249,7 +249,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float>
; CHECK-LABEL: test_masked_8xfloat_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $83, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,6,6,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -262,7 +262,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $83, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,6,6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 6, i32 6>
@ -273,7 +273,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-34, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,7,6,7,6]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -286,7 +286,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-34, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,7,6,7,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 7, i32 6, i32 7, i32 6>
@ -297,7 +297,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float>
; CHECK-LABEL: test_masked_8xfloat_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,2,1,6,5,4,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -310,7 +310,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $49, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,2,1,6,5,4,4]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 2, i32 1, i32 6, i32 5, i32 4, i32 4>
@ -329,7 +329,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-111, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,2,1,0,6,6,5,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -342,7 +342,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-111, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,1,0,6,6,5,4]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 2, i32 1, i32 0, i32 6, i32 6, i32 5, i32 4>
@ -353,7 +353,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float>
; CHECK-LABEL: test_masked_8xfloat_perm_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $61, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,3,7,7,6,5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -366,7 +366,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask4:
; CHECK: # BB#0:
; CHECK-NEXT: movb $61, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,3,7,7,6,5]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 6, i32 5>
@ -377,7 +377,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3,6,5,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -390,7 +390,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask5(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3,6,5,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 3, i32 6, i32 5, i32 7, i32 7>
@ -409,7 +409,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float>
; CHECK-LABEL: test_masked_8xfloat_perm_mask6:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-51, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,5,6,7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -422,7 +422,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_mask6:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-51, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,5,6,7,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 5, i32 6, i32 7, i32 7>
@ -433,7 +433,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x fl
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,0,2,1,7,4,6,5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -446,7 +446,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask7(<8 x float> %vec) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,2,1,7,4,6,5]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 4, i32 6, i32 5>
@ -468,7 +468,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: movb $-95, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[3,0,0,2,4,6,7,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -482,7 +482,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: movb $-95, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,2,4,6,7,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -495,7 +495,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-41, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,2,2,6,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -508,7 +508,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-41, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,2,2,6,4,6,6]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -522,7 +522,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[2,1,1,3,4,4,7,4]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -536,7 +536,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,1,3,4,4,7,4]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -558,7 +558,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-70, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,0,3,3,4,4,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -571,7 +571,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-70, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,0,3,3,4,4,7,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -585,7 +585,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: movb $30, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[0,1,0,1,4,6,5,4]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -599,7 +599,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask4(<8 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: movb $30, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,0,1,4,6,5,4]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -612,7 +612,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movb $56, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,0,3,6,4,4,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -625,7 +625,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movb $56, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,0,3,6,4,4,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -649,7 +649,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: movb $-54, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[0,1,2,3,7,4,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -663,7 +663,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask6(<8 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: movb $-54, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,6,7]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -676,7 +676,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8
; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movb $85, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,2,3,1,4,6,7,5]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -689,7 +689,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp) {
; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movb $85, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,1,4,6,7,5]
; CHECK-NEXT: retq
%vec = load <8 x float>, <8 x float>* %vp
@ -712,7 +712,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: movw $16429, %ax # imm = 0x402D
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -726,7 +726,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: movw $16429, %ax # imm = 0x402D
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
@ -737,7 +737,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-26425, %ax # imm = 0x98C7
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -750,7 +750,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-26425, %ax # imm = 0x98C7
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 1, i32 6, i32 6, i32 6, i32 5, i32 10, i32 10, i32 10, i32 9, i32 14, i32 14, i32 14, i32 13>
@ -762,7 +762,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -776,7 +776,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 0, i32 5, i32 4, i32 6, i32 5, i32 11, i32 10, i32 9, i32 9, i32 14, i32 13, i32 14, i32 12>
@ -795,7 +795,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -808,7 +808,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 2, i32 5, i32 5, i32 4, i32 6, i32 9, i32 9, i32 8, i32 10, i32 13, i32 13, i32 12, i32 14>
@ -820,7 +820,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -834,7 +834,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 3, i32 3, i32 5, i32 5, i32 5, i32 7, i32 11, i32 11, i32 8, i32 11, i32 14, i32 12, i32 14, i32 15>
@ -845,7 +845,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -858,7 +858,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 1, i32 0, i32 5, i32 6, i32 5, i32 4, i32 9, i32 10, i32 9, i32 8, i32 13, i32 14, i32 13, i32 12>
@ -879,7 +879,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x fl
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: movw $-28239, %ax # imm = 0x91B1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -893,7 +893,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: movw $-28239, %ax # imm = 0x91B1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
@ -904,7 +904,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-32205, %ax # imm = 0x8233
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -917,7 +917,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-32205, %ax # imm = 0x8233
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 0, i32 2, i32 7, i32 7, i32 4, i32 6, i32 11, i32 11, i32 8, i32 10, i32 15, i32 15, i32 12, i32 14>
@ -939,7 +939,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: movw $-22887, %ax # imm = 0xA699
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -953,7 +953,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: movw $-22887, %ax # imm = 0xA699
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -966,7 +966,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $22744, %ax # imm = 0x58D8
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -979,7 +979,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $22744, %ax # imm = 0x58D8
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -993,7 +993,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
; CHECK-NEXT: movw $-8399, %ax # imm = 0xDF31
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1007,7 +1007,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
; CHECK-NEXT: movw $-8399, %ax # imm = 0xDF31
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1029,7 +1029,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $18246, %ax # imm = 0x4746
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1042,7 +1042,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $18246, %ax # imm = 0x4746
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1056,7 +1056,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
; CHECK-NEXT: movw $1218, %ax # imm = 0x4C2
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1070,7 +1070,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask4(<16 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
; CHECK-NEXT: movw $1218, %ax # imm = 0x4C2
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1083,7 +1083,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movw $2665, %ax # imm = 0xA69
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1096,7 +1096,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask5:
; CHECK: # BB#0:
; CHECK-NEXT: movw $2665, %ax # imm = 0xA69
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1120,7 +1120,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: movw $-20907, %ax # imm = 0xAE55
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1134,7 +1134,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask6(<16 x float>* %vp) {
; CHECK: # BB#0:
; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: movw $-20907, %ax # imm = 0xAE55
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1147,7 +1147,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp,
; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28944, %ax # imm = 0x8EF0
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1160,7 +1160,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp
; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask7:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-28944, %ax # imm = 0x8EF0
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
@ -1181,7 +1181,7 @@ define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x dou
; CHECK-LABEL: test_masked_2xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
@ -1194,7 +1194,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
; CHECK-NEXT: retq
%shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
@ -1205,7 +1205,7 @@ define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x dou
; CHECK-LABEL: test_masked_2xdouble_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
@ -1218,7 +1218,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mask1(<2 x double> %vec) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
; CHECK-NEXT: retq
%shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
@ -1238,7 +1238,7 @@ define <2 x double> @test_masked_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x
; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -1251,7 +1251,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mem_mask0(<2 x double>* %vp) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -1264,7 +1264,7 @@ define <2 x double> @test_masked_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x
; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -1277,7 +1277,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mem_mask1(<2 x double>* %vp) {
; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0]
; CHECK-NEXT: retq
%vec = load <2 x double>, <2 x double>* %vp
@ -1298,7 +1298,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x dou
; CHECK-LABEL: test_masked_4xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,0,2,3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1311,7 +1311,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,2,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
@ -1322,7 +1322,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x dou
; CHECK-LABEL: test_masked_4xdouble_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1335,7 +1335,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
@ -1346,7 +1346,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x dou
; CHECK-LABEL: test_masked_4xdouble_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $9, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[0,1,3,3]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1359,7 +1359,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $9, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
@ -1378,7 +1378,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x dou
; CHECK-LABEL: test_masked_4xdouble_perm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1391,7 +1391,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
@ -1411,7 +1411,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1424,7 +1424,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $13, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1437,7 +1437,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1450,7 +1450,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,3,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1463,7 +1463,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1476,7 +1476,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,3]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1498,7 +1498,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x
; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1511,7 +1511,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp) {
; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,2]
; CHECK-NEXT: retq
%vec = load <4 x double>, <4 x double>* %vp
@ -1532,7 +1532,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x dou
; CHECK-LABEL: test_masked_8xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-107, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,3,2,4,5,7,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1545,7 +1545,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-107, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,3,2,4,5,7,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 2, i32 4, i32 5, i32 7, i32 6>
@ -1556,7 +1556,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask1(<8 x double> %vec, <8 x dou
; CHECK-LABEL: test_masked_8xdouble_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-39, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,7,6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1569,7 +1569,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask1(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-39, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,7,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 6>
@ -1580,7 +1580,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x dou
; CHECK-LABEL: test_masked_8xdouble_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-53, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,3,5,5,6,7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1593,7 +1593,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-53, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,3,5,5,6,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 5, i32 5, i32 6, i32 7>
@ -1612,7 +1612,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask3(<8 x double> %vec, <8 x dou
; CHECK-LABEL: test_masked_8xdouble_perm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-89, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,2,4,4,6,7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1625,7 +1625,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask3(<8 x double> %vec) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-89, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 7>
@ -1645,7 +1645,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-95, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,5,4,7,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1658,7 +1658,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-95, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,5,4,7,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1671,7 +1671,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $27, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,3,3,4,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1684,7 +1684,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask1(<8 x double>* %vp) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $27, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,3,3,4,5,7,7]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1697,7 +1697,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,4,7,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1710,7 +1710,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-116, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,4,7,6]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1732,7 +1732,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x
; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $89, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,0,3,2,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp
@ -1745,7 +1745,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask3(<8 x double>* %vp) {
; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $89, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,2,4,5,6,7]
; CHECK-NEXT: retq
%vec = load <8 x double>, <8 x double>* %vp

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
; FIXME: All cases here should be fixed by PR34380
@ -4146,10 +4146,10 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %v
; CHECK: # BB#0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1}
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <2 x i32> <i32 3, i32 7>
@ -4162,9 +4162,10 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask1(<8 x double>
; CHECK: # BB#0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <2 x i32> <i32 3, i32 7>

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) {
; CHECK-LABEL: test_16xi16_perm_mask0:

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
define <4 x float> @test_4xfloat_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2) {
; CHECK-LABEL: test_4xfloat_shuff_mask0:
@ -13,7 +13,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x floa
; CHECK-LABEL: test_4xfloat_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[2,1],xmm1[3,1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
@ -26,7 +26,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],xmm1[3,1]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5>
@ -37,7 +37,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x floa
; CHECK-LABEL: test_4xfloat_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,2],xmm1[3,2]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
@ -50,7 +50,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2],xmm1[3,2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 2, i32 7, i32 6>
@ -61,7 +61,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x floa
; CHECK-LABEL: test_4xfloat_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,3],xmm1[2,1]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
@ -74,7 +74,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask2(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm1[2,1]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 5>
@ -93,7 +93,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x floa
; CHECK-LABEL: test_4xfloat_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[3,3],xmm1[3,3]
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
@ -106,7 +106,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask3(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $3, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],xmm1[3,3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7>
@ -126,7 +126,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,0],mem[1,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -140,7 +140,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1,
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0],mem[1,2]
; CHECK-NEXT: retq
%vec2 = load <4 x float>, <4 x float>* %vec2p
@ -153,7 +153,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,3],mem[1,3]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -167,7 +167,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1,
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $10, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],mem[1,3]
; CHECK-NEXT: retq
%vec2 = load <4 x float>, <4 x float>* %vec2p
@ -180,7 +180,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],mem[2,0]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -194,7 +194,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask2(<4 x float> %vec1,
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],mem[2,0]
; CHECK-NEXT: retq
%vec2 = load <4 x float>, <4 x float>* %vec2p
@ -216,7 +216,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x
; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[2,1],mem[3,2]
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
@ -230,7 +230,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask3(<4 x float> %vec1,
; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],mem[3,2]
; CHECK-NEXT: retq
%vec2 = load <4 x float>, <4 x float>* %vec2p
@ -251,7 +251,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x floa
; CHECK-LABEL: test_8xfloat_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
@ -264,7 +264,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14>
@ -275,7 +275,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x floa
; CHECK-LABEL: test_8xfloat_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
@ -288,7 +288,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $126, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 3, i32 11, i32 9, i32 4, i32 7, i32 15, i32 13>
@ -299,7 +299,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x floa
; CHECK-LABEL: test_8xfloat_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-35, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
@ -312,7 +312,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-35, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 2, i32 10, i32 10, i32 4, i32 6, i32 14, i32 14>
@ -331,7 +331,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x floa
; CHECK-LABEL: test_8xfloat_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
@ -344,7 +344,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $62, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14>
@ -364,7 +364,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-106, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -378,7 +378,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1,
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-106, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
; CHECK-NEXT: retq
%vec2 = load <8 x float>, <8 x float>* %vec2p
@ -391,7 +391,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -405,7 +405,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1,
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $114, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4]
; CHECK-NEXT: retq
%vec2 = load <8 x float>, <8 x float>* %vec2p
@ -418,7 +418,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-104, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -432,7 +432,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1,
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-104, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7]
; CHECK-NEXT: retq
%vec2 = load <8 x float>, <8 x float>* %vec2p
@ -454,7 +454,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x
; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
; CHECK-NEXT: vmovaps %ymm1, %ymm0
; CHECK-NEXT: retq
@ -468,7 +468,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1,
; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $98, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
; CHECK-NEXT: retq
%vec2 = load <8 x float>, <8 x float>* %vec2p
@ -489,7 +489,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x
; CHECK-LABEL: test_16xfloat_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-19315, %ax # imm = 0xB48D
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
@ -502,7 +502,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-19315, %ax # imm = 0xB48D
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30>
@ -513,7 +513,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x
; CHECK-LABEL: test_16xfloat_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $18064, %ax # imm = 0x4690
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
@ -526,7 +526,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $18064, %ax # imm = 0x4690
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 1, i32 2, i32 19, i32 19, i32 5, i32 6, i32 23, i32 23, i32 9, i32 10, i32 27, i32 27, i32 13, i32 14, i32 31, i32 31>
@ -537,7 +537,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x
; CHECK-LABEL: test_16xfloat_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-12346, %ax # imm = 0xCFC6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
@ -550,7 +550,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-12346, %ax # imm = 0xCFC6
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 18, i32 17, i32 7, i32 4, i32 22, i32 21, i32 11, i32 8, i32 26, i32 25, i32 15, i32 12, i32 30, i32 29>
@ -569,7 +569,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x
; CHECK-LABEL: test_16xfloat_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-9865, %ax # imm = 0xD977
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
@ -582,7 +582,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-9865, %ax # imm = 0xD977
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30>
@ -602,7 +602,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $7677, %ax # imm = 0x1DFD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -616,7 +616,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movw $7677, %ax # imm = 0x1DFD
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
; CHECK-NEXT: retq
%vec2 = load <16 x float>, <16 x float>* %vec2p
@ -629,7 +629,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $14448, %ax # imm = 0x3870
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -643,7 +643,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movw $14448, %ax # imm = 0x3870
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14]
; CHECK-NEXT: retq
%vec2 = load <16 x float>, <16 x float>* %vec2p
@ -656,7 +656,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-13463, %ax # imm = 0xCB69
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -670,7 +670,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movw $-13463, %ax # imm = 0xCB69
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14]
; CHECK-NEXT: retq
%vec2 = load <16 x float>, <16 x float>* %vec2p
@ -692,7 +692,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1
; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $21793, %ax # imm = 0x5521
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
@ -706,7 +706,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec
; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movw $21793, %ax # imm = 0x5521
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
; CHECK-NEXT: retq
%vec2 = load <16 x float>, <16 x float>* %vec2p
@ -727,7 +727,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x d
; CHECK-LABEL: test_2xdouble_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
@ -740,7 +740,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, <
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
; CHECK-NEXT: retq
%shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
@ -751,7 +751,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x d
; CHECK-LABEL: test_2xdouble_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
@ -764,7 +764,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mask1(<2 x double> %vec1, <
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
; CHECK-NEXT: retq
%shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
@ -784,7 +784,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2
; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
@ -798,7 +798,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0]
; CHECK-NEXT: retq
%vec2 = load <2 x double>, <2 x double>* %vec2p
@ -811,7 +811,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2
; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0]
; CHECK-NEXT: vmovapd %xmm1, %xmm0
; CHECK-NEXT: retq
@ -825,7 +825,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask1(<2 x double> %vec
; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0]
; CHECK-NEXT: retq
%vec2 = load <2 x double>, <2 x double>* %vec2p
@ -846,7 +846,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d
; CHECK-LABEL: test_4xdouble_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
@ -859,7 +859,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7>
@ -870,7 +870,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d
; CHECK-LABEL: test_4xdouble_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
@ -883,7 +883,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $8, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6>
@ -894,7 +894,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d
; CHECK-LABEL: test_4xdouble_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
@ -907,7 +907,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $6, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
@ -926,7 +926,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d
; CHECK-LABEL: test_4xdouble_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
@ -939,7 +939,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $2, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
; CHECK-NEXT: retq
%shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
@ -959,7 +959,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -973,7 +973,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $5, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[2]
; CHECK-NEXT: retq
%vec2 = load <4 x double>, <4 x double>* %vec2p
@ -986,7 +986,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1000,7 +1000,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $4, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: retq
%vec2 = load <4 x double>, <4 x double>* %vec2p
@ -1013,7 +1013,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[3],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1027,7 +1027,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $14, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[3],mem[2]
; CHECK-NEXT: retq
%vec2 = load <4 x double>, <4 x double>* %vec2p
@ -1049,7 +1049,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4
; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: retq
@ -1063,7 +1063,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec
; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $11, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[2],mem[2]
; CHECK-NEXT: retq
%vec2 = load <4 x double>, <4 x double>* %vec2p
@ -1084,7 +1084,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d
; CHECK-LABEL: test_8xdouble_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-77, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
@ -1097,7 +1097,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-77, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15>
@ -1108,7 +1108,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d
; CHECK-LABEL: test_8xdouble_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $107, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
@ -1121,7 +1121,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $107, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 5, i32 13, i32 6, i32 15>
@ -1132,7 +1132,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d
; CHECK-LABEL: test_8xdouble_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-87, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
@ -1145,7 +1145,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-87, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 13, i32 6, i32 14>
@ -1164,7 +1164,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d
; CHECK-LABEL: test_8xdouble_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
@ -1177,7 +1177,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $12, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15>
@ -1197,7 +1197,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $72, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1211,7 +1211,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: movb $72, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
; CHECK-NEXT: retq
%vec2 = load <8 x double>, <8 x double>* %vec2p
@ -1224,7 +1224,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1238,7 +1238,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-7, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7]
; CHECK-NEXT: retq
%vec2 = load <8 x double>, <8 x double>* %vec2p
@ -1251,7 +1251,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $26, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1265,7 +1265,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
; CHECK: # BB#0:
; CHECK-NEXT: movb $26, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7]
; CHECK-NEXT: retq
%vec2 = load <8 x double>, <8 x double>* %vec2p
@ -1287,7 +1287,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8
; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-39, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
@ -1301,7 +1301,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec
; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
; CHECK: # BB#0:
; CHECK-NEXT: movb $-39, %al
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
; CHECK-NEXT: retq
%vec2 = load <8 x double>, <8 x double>* %vec2p

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) {
; CHECK-LABEL: test_16xi8_perm_mask0:

File diff suppressed because it is too large Load Diff