forked from OSchip/llvm-project
[X86][AVX512] Add support for VPERM/VSHUF masked shuffle comments
llvm-svn: 274462
This commit is contained in:
parent
68f438a036
commit
1f59076196
|
@ -90,6 +90,16 @@ using namespace llvm;
|
|||
CASE_AVX_INS_COMMON(Inst, Y, suf) \
|
||||
CASE_SSE_INS_COMMON(Inst, suf)
|
||||
|
||||
#define CASE_MASK_SHUF(Inst, src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z128, r##src##i)
|
||||
|
||||
#define CASE_MASKZ_SHUF(Inst, src) \
|
||||
CASE_MASKZ_INS_COMMON(Inst, Z, r##src##i) \
|
||||
CASE_MASKZ_INS_COMMON(Inst, Z256, r##src##i) \
|
||||
CASE_MASKZ_INS_COMMON(Inst, Z128, r##src##i)
|
||||
|
||||
#define CASE_VPERM(Inst, src) \
|
||||
CASE_AVX512_INS_COMMON(Inst, Z, src##i) \
|
||||
CASE_AVX512_INS_COMMON(Inst, Z256, src##i) \
|
||||
|
@ -97,12 +107,34 @@ using namespace llvm;
|
|||
CASE_AVX_INS_COMMON(Inst, , src##i) \
|
||||
CASE_AVX_INS_COMMON(Inst, Y, src##i)
|
||||
|
||||
#define CASE_MASK_VPERM(Inst, src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z, src##i) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z256, src##i) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z128, src##i)
|
||||
|
||||
#define CASE_MASKZ_VPERM(Inst, src) \
|
||||
CASE_MASKZ_INS_COMMON(Inst, Z, src##i) \
|
||||
CASE_MASKZ_INS_COMMON(Inst, Z256, src##i) \
|
||||
CASE_MASKZ_INS_COMMON(Inst, Z128, src##i)
|
||||
|
||||
#define CASE_VSHUF(Inst, src) \
|
||||
CASE_AVX512_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
|
||||
CASE_AVX512_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
|
||||
CASE_AVX512_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
|
||||
CASE_AVX512_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
|
||||
|
||||
#define CASE_MASK_VSHUF(Inst, src) \
|
||||
CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
|
||||
CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
|
||||
CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
|
||||
CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
|
||||
|
||||
#define CASE_MASKZ_VSHUF(Inst, src) \
|
||||
CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
|
||||
CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
|
||||
CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
|
||||
CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
|
||||
|
||||
static unsigned getVectorRegSize(unsigned RegNo) {
|
||||
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
|
||||
return 512;
|
||||
|
@ -178,6 +210,18 @@ static std::string getMaskName(const MCInst *MI, const char *DestName,
|
|||
CASE_MASKZ_PMOVZX(PMOVZXWD, r)
|
||||
CASE_MASKZ_PMOVZX(PMOVZXWQ, m)
|
||||
CASE_MASKZ_PMOVZX(PMOVZXWQ, r)
|
||||
CASE_MASKZ_SHUF(SHUFPD, m)
|
||||
CASE_MASKZ_SHUF(SHUFPD, r)
|
||||
CASE_MASKZ_SHUF(SHUFPS, m)
|
||||
CASE_MASKZ_SHUF(SHUFPS, r)
|
||||
CASE_MASKZ_VPERM(PERMILPD, m)
|
||||
CASE_MASKZ_VPERM(PERMILPD, r)
|
||||
CASE_MASKZ_VPERM(PERMILPS, m)
|
||||
CASE_MASKZ_VPERM(PERMILPS, r)
|
||||
CASE_MASKZ_VSHUF(64X2, m)
|
||||
CASE_MASKZ_VSHUF(64X2, r)
|
||||
CASE_MASKZ_VSHUF(32X4, m)
|
||||
CASE_MASKZ_VSHUF(32X4, r)
|
||||
MaskWithZero = true;
|
||||
MaskRegName = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
|
@ -199,6 +243,18 @@ static std::string getMaskName(const MCInst *MI, const char *DestName,
|
|||
CASE_MASK_PMOVZX(PMOVZXWD, r)
|
||||
CASE_MASK_PMOVZX(PMOVZXWQ, m)
|
||||
CASE_MASK_PMOVZX(PMOVZXWQ, r)
|
||||
CASE_MASK_SHUF(SHUFPD, m)
|
||||
CASE_MASK_SHUF(SHUFPD, r)
|
||||
CASE_MASK_SHUF(SHUFPS, m)
|
||||
CASE_MASK_SHUF(SHUFPS, r)
|
||||
CASE_MASK_VPERM(PERMILPD, m)
|
||||
CASE_MASK_VPERM(PERMILPD, r)
|
||||
CASE_MASK_VPERM(PERMILPS, m)
|
||||
CASE_MASK_VPERM(PERMILPS, r)
|
||||
CASE_MASK_VSHUF(64X2, m)
|
||||
CASE_MASK_VSHUF(64X2, r)
|
||||
CASE_MASK_VSHUF(32X4, m)
|
||||
CASE_MASK_VSHUF(32X4, r)
|
||||
MaskRegName = getRegName(MI->getOperand(2).getReg());
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -5255,7 +5255,7 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -5271,8 +5271,8 @@ define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x d
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
|
||||
|
@ -5292,7 +5292,7 @@ define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32>
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -5308,7 +5308,7 @@ define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -5407,8 +5407,8 @@ define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
|
||||
; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
|
||||
; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
|
||||
; CHECK-NEXT: vshufpd {{.*#+}} zmm3 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
|
||||
; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
|
||||
|
@ -5428,7 +5428,7 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
|
||||
; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
|
||||
; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -5444,8 +5444,8 @@ define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm0[0,1,3,2,5,4,6,6]
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = zmm0[0,1,3,2,5,4,6,6]
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
|
||||
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
|
||||
|
@ -5464,8 +5464,8 @@ define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <1
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
|
||||
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
|
||||
|
@ -5866,8 +5866,8 @@ define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
|
@ -5887,8 +5887,8 @@ define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
|
@ -5908,8 +5908,8 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
||||
|
@ -5929,8 +5929,8 @@ define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
|
|
|
@ -637,8 +637,8 @@ define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
|
@ -658,8 +658,8 @@ define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
|
@ -679,8 +679,8 @@ define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
||||
|
@ -700,8 +700,8 @@ define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
|
||||
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s
|
||||
|
||||
define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
|
||||
|
@ -2369,9 +2369,9 @@ define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00]
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
|
||||
|
@ -2393,9 +2393,9 @@ define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00]
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1]
|
||||
; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1]
|
||||
|
|
|
@ -5414,9 +5414,9 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd9,0x16]
|
||||
; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
|
||||
|
@ -5437,9 +5437,9 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd9,0x16]
|
||||
; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
|
||||
|
@ -5460,7 +5460,7 @@ define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
|
||||
|
@ -5478,7 +5478,7 @@ define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
|
||||
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
|
||||
|
@ -5584,9 +5584,9 @@ define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x16]
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xd9,0x16]
|
||||
; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: ## xmm3 {%k1} {z} = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc6,0xc1,0x16]
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1]
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
|
||||
|
@ -5607,7 +5607,7 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
|
||||
; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc6,0xc1,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
|
||||
|
@ -5625,7 +5625,7 @@ define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x fl
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[2,1],xmm1[1,0]
|
||||
; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
|
||||
; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc6,0xc1,0x16]
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0]
|
||||
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
|
||||
|
@ -5643,7 +5643,7 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x fl
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
|
||||
; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc6,0xc1,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
|
||||
|
@ -5729,9 +5729,9 @@ define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x16]
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[0,1,3,2]
|
||||
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2]
|
||||
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xd0,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,3,2]
|
||||
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xc0,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
|
||||
; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
|
||||
|
@ -5752,9 +5752,9 @@ define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[1,0]
|
||||
; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0]
|
||||
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xd0,0x01]
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[1,0]
|
||||
; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[1,0]
|
||||
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xc0,0x01]
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[1,0]
|
||||
; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca]
|
||||
|
@ -5775,9 +5775,9 @@ define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[2,1,1,0,6,5,5,4]
|
||||
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
|
||||
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xd0,0x16]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
|
||||
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xc0,0x16]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
|
||||
; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
|
||||
|
@ -5798,9 +5798,9 @@ define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[2,1,1,0]
|
||||
; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0]
|
||||
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xd0,0x16]
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0]
|
||||
; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[2,1,1,0]
|
||||
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xc0,0x16]
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
|
||||
; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
|
||||
|
@ -6575,9 +6575,9 @@ define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0,
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd0,0x00]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xc8,0x00]
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc0,0x00]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
|
||||
|
@ -6598,9 +6598,9 @@ define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x43,0xd0,0x00]
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xc8,0x00]
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc0,0x00]
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3]
|
||||
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
|
||||
|
|
|
@ -2126,7 +2126,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1
|
|||
; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
|
||||
|
@ -2134,7 +2134,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1
|
|||
; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
|
||||
; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
|
||||
; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
|
||||
%res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
|
||||
|
@ -2147,7 +2147,7 @@ define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1>
|
|||
; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
|
||||
|
@ -2155,7 +2155,7 @@ define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1>
|
|||
; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
|
||||
; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
|
||||
; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
|
||||
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
|
||||
%res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
|
||||
|
@ -2184,7 +2184,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double>
|
|||
; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
|
||||
|
@ -2193,7 +2193,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double>
|
|||
; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%x1 = load <8 x double>,<8 x double> *%ptr,align 1
|
||||
%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
|
||||
|
@ -2207,7 +2207,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double>
|
|||
; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
|
||||
|
@ -2216,7 +2216,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double>
|
|||
; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
|
||||
; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%x1 = load <8 x double>,<8 x double> *%ptr,align 1
|
||||
%y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
|
||||
|
|
|
@ -233,7 +233,7 @@ define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask(<16 x float> %x0, <
|
|||
; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 %m)
|
||||
ret <16 x float> %res0
|
||||
|
@ -242,7 +242,7 @@ define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask_load(<16 x float> *
|
|||
; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask_load:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <16 x float>, <16 x float> *%p0
|
||||
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 %m)
|
||||
|
|
Loading…
Reference in New Issue