From 1f59076196f6f8d66ef0bf643b2a1a242e4ebab2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 3 Jul 2016 13:55:41 +0000 Subject: [PATCH] [X86][AVX512] Add support for VPERM/VSHUF masked shuffle comments llvm-svn: 274462 --- .../X86/InstPrinter/X86InstComments.cpp | 56 +++++++++++++++++++ llvm/test/CodeGen/X86/avx512-intrinsics.ll | 40 ++++++------- llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 16 +++--- .../test/CodeGen/X86/avx512dqvl-intrinsics.ll | 10 ++-- llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 46 +++++++-------- .../test/CodeGen/X86/vector-shuffle-512-v8.ll | 16 +++--- .../X86/vector-shuffle-combining-avx512bw.ll | 4 +- 7 files changed, 122 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 393ae3c0e1dc..fbf262117496 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -90,6 +90,16 @@ using namespace llvm; CASE_AVX_INS_COMMON(Inst, Y, suf) \ CASE_SSE_INS_COMMON(Inst, suf) +#define CASE_MASK_SHUF(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) + +#define CASE_MASKZ_SHUF(Inst, src) \ + CASE_MASKZ_INS_COMMON(Inst, Z, r##src##i) \ + CASE_MASKZ_INS_COMMON(Inst, Z256, r##src##i) \ + CASE_MASKZ_INS_COMMON(Inst, Z128, r##src##i) + #define CASE_VPERM(Inst, src) \ CASE_AVX512_INS_COMMON(Inst, Z, src##i) \ CASE_AVX512_INS_COMMON(Inst, Z256, src##i) \ @@ -97,12 +107,34 @@ using namespace llvm; CASE_AVX_INS_COMMON(Inst, , src##i) \ CASE_AVX_INS_COMMON(Inst, Y, src##i) +#define CASE_MASK_VPERM(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, src##i) \ + CASE_MASK_INS_COMMON(Inst, Z256, src##i) \ + CASE_MASK_INS_COMMON(Inst, Z128, src##i) + +#define CASE_MASKZ_VPERM(Inst, src) \ + CASE_MASKZ_INS_COMMON(Inst, Z, src##i) \ + CASE_MASKZ_INS_COMMON(Inst, Z256, src##i) \ + CASE_MASKZ_INS_COMMON(Inst, Z128, src##i) + #define CASE_VSHUF(Inst, src) \ CASE_AVX512_INS_COMMON(SHUFF##Inst, Z, r##src##i) \ CASE_AVX512_INS_COMMON(SHUFI##Inst, Z, r##src##i) \ CASE_AVX512_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \ CASE_AVX512_INS_COMMON(SHUFI##Inst, Z256, r##src##i) +#define CASE_MASK_VSHUF(Inst, src) \ + CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \ + CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i) + +#define CASE_MASKZ_VSHUF(Inst, src) \ + CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z, r##src##i) \ + CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z, r##src##i) \ + CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \ + CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z256, r##src##i) + static unsigned getVectorRegSize(unsigned RegNo) { if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31) return 512; @@ -178,6 +210,18 @@ static std::string getMaskName(const MCInst *MI, const char *DestName, CASE_MASKZ_PMOVZX(PMOVZXWD, r) CASE_MASKZ_PMOVZX(PMOVZXWQ, m) CASE_MASKZ_PMOVZX(PMOVZXWQ, r) + CASE_MASKZ_SHUF(SHUFPD, m) + CASE_MASKZ_SHUF(SHUFPD, r) + CASE_MASKZ_SHUF(SHUFPS, m) + CASE_MASKZ_SHUF(SHUFPS, r) + CASE_MASKZ_VPERM(PERMILPD, m) + CASE_MASKZ_VPERM(PERMILPD, r) + CASE_MASKZ_VPERM(PERMILPS, m) + CASE_MASKZ_VPERM(PERMILPS, r) + CASE_MASKZ_VSHUF(64X2, m) + CASE_MASKZ_VSHUF(64X2, r) + CASE_MASKZ_VSHUF(32X4, m) + CASE_MASKZ_VSHUF(32X4, r) MaskWithZero = true; MaskRegName = getRegName(MI->getOperand(1).getReg()); break; @@ -199,6 +243,18 @@ static std::string getMaskName(const MCInst *MI, const char *DestName, CASE_MASK_PMOVZX(PMOVZXWD, r) CASE_MASK_PMOVZX(PMOVZXWQ, m) CASE_MASK_PMOVZX(PMOVZXWQ, r) + CASE_MASK_SHUF(SHUFPD, m) + CASE_MASK_SHUF(SHUFPD, r) + CASE_MASK_SHUF(SHUFPS, m) + CASE_MASK_SHUF(SHUFPS, r) + CASE_MASK_VPERM(PERMILPD, m) + CASE_MASK_VPERM(PERMILPD, r) + CASE_MASK_VPERM(PERMILPS, m) + CASE_MASK_VPERM(PERMILPS, r) + CASE_MASK_VSHUF(64X2, m) + CASE_MASK_VSHUF(64X2, r) + CASE_MASK_VSHUF(32X4, m) + CASE_MASK_VSHUF(32X4, r) MaskRegName = getRegName(MI->getOperand(2).getReg()); break; } diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 23fbcac642a9..d2bdf5a87f13 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -5255,7 +5255,7 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -5271,8 +5271,8 @@ define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x d ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1] -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 @@ -5292,7 +5292,7 @@ define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -5308,7 +5308,7 @@ define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -5407,8 +5407,8 @@ define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: vshufpd {{.*#+}} zmm3 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 @@ -5428,7 +5428,7 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] +; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -5444,8 +5444,8 @@ define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm0[0,1,3,2,5,4,6,6] -; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = zmm0[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6] ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6] ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 @@ -5464,8 +5464,8 @@ define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <1 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] -; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 @@ -5866,8 +5866,8 @@ define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 @@ -5887,8 +5887,8 @@ define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3] -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 @@ -5908,8 +5908,8 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 @@ -5929,8 +5929,8 @@ define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3] -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index 5d9139fbf770..434495b9f725 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -637,8 +637,8 @@ define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 @@ -658,8 +658,8 @@ define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1] -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 @@ -679,8 +679,8 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 @@ -700,8 +700,8 @@ define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1] -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 1ef679a687e2..19cffba6c43f 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) { @@ -2369,9 +2369,9 @@ define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,0,1] +; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1] ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00] -; CHECK-NEXT: ## ymm1 = ymm0[0,1,0,1] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1] ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] @@ -2393,9 +2393,9 @@ define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,0,1] +; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1] ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00] -; CHECK-NEXT: ## ymm1 = ymm0[0,1,0,1] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1] ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 98db1ae92a04..d951b406396d 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5414,9 +5414,9 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd9,0x16] -; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] @@ -5437,9 +5437,9 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd9,0x16] -; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3] +; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] @@ -5460,7 +5460,7 @@ define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] @@ -5478,7 +5478,7 @@ define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] @@ -5584,9 +5584,9 @@ define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x16] -; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[1] +; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[1] ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xd9,0x16] -; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[1] +; CHECK-NEXT: ## xmm3 {%k1} {z} = xmm0[0],xmm1[1] ; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc6,0xc1,0x16] ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1] ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] @@ -5607,7 +5607,7 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] ; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc6,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] @@ -5625,7 +5625,7 @@ define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x fl ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] -; CHECK-NEXT: ## xmm2 = xmm0[2,1],xmm1[1,0] +; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2,1],xmm1[1,0] ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc6,0xc1,0x16] ; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0] ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] @@ -5643,7 +5643,7 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x fl ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] +; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc6,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] @@ -5729,9 +5729,9 @@ define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x16] -; CHECK-NEXT: ## ymm1 = ymm0[0,1,3,2] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2] ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xd0,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2] +; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,3,2] ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xc0,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2] ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca] @@ -5752,9 +5752,9 @@ define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] -; CHECK-NEXT: ## xmm1 = xmm0[1,0] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xd0,0x01] -; CHECK-NEXT: ## xmm2 = xmm0[1,0] +; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[1,0] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xc0,0x01] ; CHECK-NEXT: ## xmm0 = xmm0[1,0] ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca] @@ -5775,9 +5775,9 @@ define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] -; CHECK-NEXT: ## ymm1 = ymm0[2,1,1,0,6,5,5,4] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xd0,0x16] -; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4] +; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xc0,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] @@ -5798,9 +5798,9 @@ define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] -; CHECK-NEXT: ## xmm1 = xmm0[2,1,1,0] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xd0,0x16] -; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0] +; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[2,1,1,0] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xc0,0x16] ; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0] ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] @@ -6575,9 +6575,9 @@ define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd0,0x00] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xc8,0x00] -; CHECK-NEXT: ## ymm1 = ymm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] @@ -6598,9 +6598,9 @@ define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x43,0xd0,0x00] -; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xc8,0x00] -; CHECK-NEXT: ## ymm1 = ymm0[0,1,2,3,0,1,2,3] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index e9b46b95061e..3edcfd980770 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2126,7 +2126,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1 -; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1] +; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_vshuff64x2_512_maskz: @@ -2134,7 +2134,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1 ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1 -; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1] +; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-32-NEXT: retl %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer @@ -2147,7 +2147,7 @@ define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2 ; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1 -; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1] +; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_vshufi64x2_512_mask: @@ -2155,7 +2155,7 @@ define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2 ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1] +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-32-NEXT: retl %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x @@ -2184,7 +2184,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1] +; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask: @@ -2193,7 +2193,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1] +; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1] ; AVX512F-32-NEXT: retl %x1 = load <8 x double>,<8 x double> *%ptr,align 1 %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> @@ -2207,7 +2207,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 -; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1] +; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz: @@ -2216,7 +2216,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> ; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1] +; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1] ; AVX512F-32-NEXT: retl %x1 = load <8 x double>,<8 x double> *%ptr,align 1 %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index f6659937edec..3c09c1cbaef5 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -233,7 +233,7 @@ define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask(<16 x float> %x0, < ; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; CHECK-NEXT: retq %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m) ret <16 x float> %res0 @@ -242,7 +242,7 @@ define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask_load(<16 x float> * ; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask_load: ; CHECK: # BB#0: ; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] ; CHECK-NEXT: retq %x0 = load <16 x float>, <16 x float> *%p0 %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 %m)