From 4078c75bd45e0dd26c6171c9b774671b3e291ee0 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 4 Jun 2015 07:07:13 +0000 Subject: [PATCH] AVX-512: added all SKX forms of VPERMW/D/Q instructions. Added all forms of VPERMPS/PD instrcuctions. Added encoding tests. llvm-svn: 239016 --- llvm/lib/Target/X86/X86InstrAVX512.td | 88 ++++--- llvm/test/MC/X86/avx512-encodings.s | 344 ++++++++++++++++++++++++++ llvm/test/MC/X86/x86-64-avx512bw.s | 35 +++ 3 files changed, 429 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5d5ab14cf460..7d16c22909d8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1047,12 +1047,6 @@ multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, EVEX_4V; } } - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, - EVEX_V512, VEX_W; -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, - EVEX_V512, VEX_W; - defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, EVEX_V512; defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, @@ -1063,37 +1057,6 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), (VPERMILPDZri VR512:$src1, imm:$imm)>; -// -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I, EVEX_4V; - - def rm : AVX5128I, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, @@ -3839,7 +3802,8 @@ multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } @@ -3901,6 +3865,54 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +//===-------------------------------------------------------------------===// +// 1-src variable permutation VPERMW/D/Q +//===-------------------------------------------------------------------===// +multiclass avx512_vperm_dq_sizes opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; +} + +multiclass avx512_vpermi_dq_sizes opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in + defm Z256: avx512_shift_rmi, + avx512_shift_rmbi, EVEX_V256; +} + + +defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>; + +defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, + avx512vl_i32_info>; +defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, + avx512vl_i64_info>, VEX_W; +defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, + avx512vl_f32_info>; +defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, + avx512vl_f64_info>, VEX_W; + +defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", + X86VPermi, avx512vl_i64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", + X86VPermi, avx512vl_f64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s index 5b507a70a77e..085ccee85ecb 100644 --- a/llvm/test/MC/X86/avx512-encodings.s +++ b/llvm/test/MC/X86/avx512-encodings.s @@ -8408,3 +8408,347 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0x62,0xf3,0xfd,0x50,0x43,0x9a,0xf8,0xfb,0xff,0xff,0x7b] vshufi64x2 $123, -1032(%rdx){1to8}, %zmm16, %zmm3 +// CHECK: vpermps %zmm24, %zmm2, %zmm4 +// CHECK: encoding: [0x62,0x92,0x6d,0x48,0x16,0xe0] + vpermps %zmm24, %zmm2, %zmm4 + +// CHECK: vpermps %zmm24, %zmm2, %zmm4 {%k4} +// CHECK: encoding: [0x62,0x92,0x6d,0x4c,0x16,0xe0] + vpermps %zmm24, %zmm2, %zmm4 {%k4} + +// CHECK: vpermps %zmm24, %zmm2, %zmm4 {%k4} {z} +// CHECK: encoding: [0x62,0x92,0x6d,0xcc,0x16,0xe0] + vpermps %zmm24, %zmm2, %zmm4 {%k4} {z} + +// CHECK: vpermps (%rcx), %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0x21] + vpermps (%rcx), %zmm2, %zmm4 + +// CHECK: vpermps 291(%rax,%r14,8), %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpermps 291(%rax,%r14,8), %zmm2, %zmm4 + +// CHECK: vpermps (%rcx){1to16}, %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0x21] + vpermps (%rcx){1to16}, %zmm2, %zmm4 + +// CHECK: vpermps 8128(%rdx), %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0x62,0x7f] + vpermps 8128(%rdx), %zmm2, %zmm4 + +// CHECK: vpermps 8192(%rdx), %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0xa2,0x00,0x20,0x00,0x00] + vpermps 8192(%rdx), %zmm2, %zmm4 + +// CHECK: vpermps -8192(%rdx), %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0x62,0x80] + vpermps -8192(%rdx), %zmm2, %zmm4 + +// CHECK: vpermps -8256(%rdx), %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x16,0xa2,0xc0,0xdf,0xff,0xff] + vpermps -8256(%rdx), %zmm2, %zmm4 + +// CHECK: vpermps 508(%rdx){1to16}, %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0x62,0x7f] + vpermps 508(%rdx){1to16}, %zmm2, %zmm4 + +// CHECK: vpermps 512(%rdx){1to16}, %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0xa2,0x00,0x02,0x00,0x00] + vpermps 512(%rdx){1to16}, %zmm2, %zmm4 + +// CHECK: vpermps -512(%rdx){1to16}, %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0x62,0x80] + vpermps -512(%rdx){1to16}, %zmm2, %zmm4 + +// CHECK: vpermps -516(%rdx){1to16}, %zmm2, %zmm4 +// CHECK: encoding: [0x62,0xf2,0x6d,0x58,0x16,0xa2,0xfc,0xfd,0xff,0xff] + vpermps -516(%rdx){1to16}, %zmm2, %zmm4 + +// CHECK: vpermq $171, %zmm4, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0xcc,0xab] + vpermq $171, %zmm4, %zmm25 + +// CHECK: vpermq $171, %zmm4, %zmm25 {%k6} +// CHECK: encoding: [0x62,0x63,0xfd,0x4e,0x00,0xcc,0xab] + vpermq $171, %zmm4, %zmm25 {%k6} + +// CHECK: vpermq $171, %zmm4, %zmm25 {%k6} {z} +// CHECK: encoding: [0x62,0x63,0xfd,0xce,0x00,0xcc,0xab] + vpermq $171, %zmm4, %zmm25 {%k6} {z} + +// CHECK: vpermq $123, %zmm4, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0xcc,0x7b] + vpermq $123, %zmm4, %zmm25 + +// CHECK: vpermq $123, (%rcx), %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x09,0x7b] + vpermq $123, (%rcx), %zmm25 + +// CHECK: vpermq $123, 291(%rax,%r14,8), %zmm25 +// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x00,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermq $123, 291(%rax,%r14,8), %zmm25 + +// CHECK: vpermq $123, (%rcx){1to8}, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x09,0x7b] + vpermq $123, (%rcx){1to8}, %zmm25 + +// CHECK: vpermq $123, 8128(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x4a,0x7f,0x7b] + vpermq $123, 8128(%rdx), %zmm25 + +// CHECK: vpermq $123, 8192(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x8a,0x00,0x20,0x00,0x00,0x7b] + vpermq $123, 8192(%rdx), %zmm25 + +// CHECK: vpermq $123, -8192(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x4a,0x80,0x7b] + vpermq $123, -8192(%rdx), %zmm25 + +// CHECK: vpermq $123, -8256(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x00,0x8a,0xc0,0xdf,0xff,0xff,0x7b] + vpermq $123, -8256(%rdx), %zmm25 + +// CHECK: vpermq $123, 1016(%rdx){1to8}, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x4a,0x7f,0x7b] + vpermq $123, 1016(%rdx){1to8}, %zmm25 + +// CHECK: vpermq $123, 1024(%rdx){1to8}, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x8a,0x00,0x04,0x00,0x00,0x7b] + vpermq $123, 1024(%rdx){1to8}, %zmm25 + +// CHECK: vpermq $123, -1024(%rdx){1to8}, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x4a,0x80,0x7b] + vpermq $123, -1024(%rdx){1to8}, %zmm25 + +// CHECK: vpermq $123, -1032(%rdx){1to8}, %zmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x58,0x00,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vpermq $123, -1032(%rdx){1to8}, %zmm25 + +// CHECK: vpermq %zmm22, %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x36,0xee] + vpermq %zmm22, %zmm23, %zmm21 + +// CHECK: vpermq %zmm22, %zmm23, %zmm21 {%k1} +// CHECK: encoding: [0x62,0xa2,0xc5,0x41,0x36,0xee] + vpermq %zmm22, %zmm23, %zmm21 {%k1} + +// CHECK: vpermq %zmm22, %zmm23, %zmm21 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xc5,0xc1,0x36,0xee] + vpermq %zmm22, %zmm23, %zmm21 {%k1} {z} + +// CHECK: vpermq (%rcx), %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0x29] + vpermq (%rcx), %zmm23, %zmm21 + +// CHECK: vpermq 291(%rax,%r14,8), %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x36,0xac,0xf0,0x23,0x01,0x00,0x00] + vpermq 291(%rax,%r14,8), %zmm23, %zmm21 + +// CHECK: vpermq (%rcx){1to8}, %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0x29] + vpermq (%rcx){1to8}, %zmm23, %zmm21 + +// CHECK: vpermq 8128(%rdx), %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0x6a,0x7f] + vpermq 8128(%rdx), %zmm23, %zmm21 + +// CHECK: vpermq 8192(%rdx), %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0xaa,0x00,0x20,0x00,0x00] + vpermq 8192(%rdx), %zmm23, %zmm21 + +// CHECK: vpermq -8192(%rdx), %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0x6a,0x80] + vpermq -8192(%rdx), %zmm23, %zmm21 + +// CHECK: vpermq -8256(%rdx), %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x36,0xaa,0xc0,0xdf,0xff,0xff] + vpermq -8256(%rdx), %zmm23, %zmm21 + +// CHECK: vpermq 1016(%rdx){1to8}, %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0x6a,0x7f] + vpermq 1016(%rdx){1to8}, %zmm23, %zmm21 + +// CHECK: vpermq 1024(%rdx){1to8}, %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0xaa,0x00,0x04,0x00,0x00] + vpermq 1024(%rdx){1to8}, %zmm23, %zmm21 + +// CHECK: vpermq -1024(%rdx){1to8}, %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0x6a,0x80] + vpermq -1024(%rdx){1to8}, %zmm23, %zmm21 + +// CHECK: vpermq -1032(%rdx){1to8}, %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xc5,0x50,0x36,0xaa,0xf8,0xfb,0xff,0xff] + vpermq -1032(%rdx){1to8}, %zmm23, %zmm21 + +// CHECK: vpermpd %zmm18, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x22,0x95,0x40,0x16,0xd2] + vpermpd %zmm18, %zmm29, %zmm26 + +// CHECK: vpermpd %zmm18, %zmm29, %zmm26 {%k6} +// CHECK: encoding: [0x62,0x22,0x95,0x46,0x16,0xd2] + vpermpd %zmm18, %zmm29, %zmm26 {%k6} + +// CHECK: vpermpd %zmm18, %zmm29, %zmm26 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x95,0xc6,0x16,0xd2] + vpermpd %zmm18, %zmm29, %zmm26 {%k6} {z} + +// CHECK: vpermpd (%rcx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x11] + vpermpd (%rcx), %zmm29, %zmm26 + +// CHECK: vpermpd 291(%rax,%r14,8), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x22,0x95,0x40,0x16,0x94,0xf0,0x23,0x01,0x00,0x00] + vpermpd 291(%rax,%r14,8), %zmm29, %zmm26 + +// CHECK: vpermpd (%rcx){1to8}, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x11] + vpermpd (%rcx){1to8}, %zmm29, %zmm26 + +// CHECK: vpermpd 8128(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x52,0x7f] + vpermpd 8128(%rdx), %zmm29, %zmm26 + +// CHECK: vpermpd 8192(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x92,0x00,0x20,0x00,0x00] + vpermpd 8192(%rdx), %zmm29, %zmm26 + +// CHECK: vpermpd -8192(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x52,0x80] + vpermpd -8192(%rdx), %zmm29, %zmm26 + +// CHECK: vpermpd -8256(%rdx), %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x40,0x16,0x92,0xc0,0xdf,0xff,0xff] + vpermpd -8256(%rdx), %zmm29, %zmm26 + +// CHECK: vpermpd 1016(%rdx){1to8}, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x52,0x7f] + vpermpd 1016(%rdx){1to8}, %zmm29, %zmm26 + +// CHECK: vpermpd 1024(%rdx){1to8}, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x92,0x00,0x04,0x00,0x00] + vpermpd 1024(%rdx){1to8}, %zmm29, %zmm26 + +// CHECK: vpermpd -1024(%rdx){1to8}, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x52,0x80] + vpermpd -1024(%rdx){1to8}, %zmm29, %zmm26 + +// CHECK: vpermpd -1032(%rdx){1to8}, %zmm29, %zmm26 +// CHECK: encoding: [0x62,0x62,0x95,0x50,0x16,0x92,0xf8,0xfb,0xff,0xff] + vpermpd -1032(%rdx){1to8}, %zmm29, %zmm26 + +// CHECK: vpermpd $171, %zmm27, %zmm3 +// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x01,0xdb,0xab] + vpermpd $0xab, %zmm27, %zmm3 + +// CHECK: vpermpd $171, %zmm27, %zmm3 {%k2} +// CHECK: encoding: [0x62,0x93,0xfd,0x4a,0x01,0xdb,0xab] + vpermpd $0xab, %zmm27, %zmm3 {%k2} + +// CHECK: vpermpd $171, %zmm27, %zmm3 {%k2} {z} +// CHECK: encoding: [0x62,0x93,0xfd,0xca,0x01,0xdb,0xab] + vpermpd $0xab, %zmm27, %zmm3 {%k2} {z} + +// CHECK: vpermpd $123, %zmm27, %zmm3 +// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x01,0xdb,0x7b] + vpermpd $0x7b, %zmm27, %zmm3 + +// CHECK: vpermpd $123, (%rcx), %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x19,0x7b] + vpermpd $0x7b, (%rcx), %zmm3 + +// CHECK: vpermpd $123, 291(%rax,%r14,8), %zmm3 +// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x01,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermpd $0x7b, 291(%rax,%r14,8), %zmm3 + +// CHECK: vpermpd $123, (%rcx){1to8}, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x19,0x7b] + vpermpd $0x7b, (%rcx){1to8}, %zmm3 + +// CHECK: vpermpd $123, 8128(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x5a,0x7f,0x7b] + vpermpd $0x7b, 8128(%rdx), %zmm3 + +// CHECK: vpermpd $123, 8192(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x9a,0x00,0x20,0x00,0x00,0x7b] + vpermpd $0x7b, 8192(%rdx), %zmm3 + +// CHECK: vpermpd $123, -8192(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x5a,0x80,0x7b] + vpermpd $0x7b, -8192(%rdx), %zmm3 + +// CHECK: vpermpd $123, -8256(%rdx), %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x01,0x9a,0xc0,0xdf,0xff,0xff,0x7b] + vpermpd $0x7b, -8256(%rdx), %zmm3 + +// CHECK: vpermpd $123, 1016(%rdx){1to8}, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x5a,0x7f,0x7b] + vpermpd $0x7b, 1016(%rdx){1to8}, %zmm3 + +// CHECK: vpermpd $123, 1024(%rdx){1to8}, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x9a,0x00,0x04,0x00,0x00,0x7b] + vpermpd $0x7b, 1024(%rdx){1to8}, %zmm3 + +// CHECK: vpermpd $123, -1024(%rdx){1to8}, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x5a,0x80,0x7b] + vpermpd $0x7b, -1024(%rdx){1to8}, %zmm3 + +// CHECK: vpermpd $123, -1032(%rdx){1to8}, %zmm3 +// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x01,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vpermpd $0x7b, -1032(%rdx){1to8}, %zmm3 + +// CHECK: vpermd %zmm9, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x1d,0x40,0x36,0xf1] + vpermd %zmm9, %zmm28, %zmm22 + +// CHECK: vpermd %zmm9, %zmm28, %zmm22 {%k1} +// CHECK: encoding: [0x62,0xc2,0x1d,0x41,0x36,0xf1] + vpermd %zmm9, %zmm28, %zmm22 {%k1} + +// CHECK: vpermd %zmm9, %zmm28, %zmm22 {%k1} {z} +// CHECK: encoding: [0x62,0xc2,0x1d,0xc1,0x36,0xf1] + vpermd %zmm9, %zmm28, %zmm22 {%k1} {z} + +// CHECK: vpermd (%rcx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0x31] + vpermd (%rcx), %zmm28, %zmm22 + +// CHECK: vpermd 291(%rax,%r14,8), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x1d,0x40,0x36,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpermd 291(%rax,%r14,8), %zmm28, %zmm22 + +// CHECK: vpermd (%rcx){1to16}, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0x31] + vpermd (%rcx){1to16}, %zmm28, %zmm22 + +// CHECK: vpermd 8128(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0x72,0x7f] + vpermd 8128(%rdx), %zmm28, %zmm22 + +// CHECK: vpermd 8192(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0xb2,0x00,0x20,0x00,0x00] + vpermd 8192(%rdx), %zmm28, %zmm22 + +// CHECK: vpermd -8192(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0x72,0x80] + vpermd -8192(%rdx), %zmm28, %zmm22 + +// CHECK: vpermd -8256(%rdx), %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x40,0x36,0xb2,0xc0,0xdf,0xff,0xff] + vpermd -8256(%rdx), %zmm28, %zmm22 + +// CHECK: vpermd 508(%rdx){1to16}, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0x72,0x7f] + vpermd 508(%rdx){1to16}, %zmm28, %zmm22 + +// CHECK: vpermd 512(%rdx){1to16}, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0xb2,0x00,0x02,0x00,0x00] + vpermd 512(%rdx){1to16}, %zmm28, %zmm22 + +// CHECK: vpermd -512(%rdx){1to16}, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0x72,0x80] + vpermd -512(%rdx){1to16}, %zmm28, %zmm22 + +// CHECK: vpermd -516(%rdx){1to16}, %zmm28, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0xb2,0xfc,0xfd,0xff,0xff] + vpermd -516(%rdx){1to16}, %zmm28, %zmm22 + diff --git a/llvm/test/MC/X86/x86-64-avx512bw.s b/llvm/test/MC/X86/x86-64-avx512bw.s index ba043da66329..b81e3adffd25 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw.s +++ b/llvm/test/MC/X86/x86-64-avx512bw.s @@ -3308,3 +3308,38 @@ // CHECK: encoding: [0x62,0xe1,0x5d,0x40,0xd9,0xa2,0xc0,0xdf,0xff,0xff] vpsubusw -8256(%rdx), %zmm20, %zmm20 +// CHECK: vpermw %zmm21, %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xa2,0xe5,0x40,0x8d,0xf5] + vpermw %zmm21, %zmm19, %zmm22 + +// CHECK: vpermw %zmm21, %zmm19, %zmm22 {%k6} +// CHECK: encoding: [0x62,0xa2,0xe5,0x46,0x8d,0xf5] + vpermw %zmm21, %zmm19, %zmm22 {%k6} + +// CHECK: vpermw %zmm21, %zmm19, %zmm22 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0xe5,0xc6,0x8d,0xf5] + vpermw %zmm21, %zmm19, %zmm22 {%k6} {z} + +// CHECK: vpermw (%rcx), %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0x31] + vpermw (%rcx), %zmm19, %zmm22 + +// CHECK: vpermw 291(%rax,%r14,8), %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xa2,0xe5,0x40,0x8d,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpermw 291(%rax,%r14,8), %zmm19, %zmm22 + +// CHECK: vpermw 8128(%rdx), %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0x72,0x7f] + vpermw 8128(%rdx), %zmm19, %zmm22 + +// CHECK: vpermw 8192(%rdx), %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0xb2,0x00,0x20,0x00,0x00] + vpermw 8192(%rdx), %zmm19, %zmm22 + +// CHECK: vpermw -8192(%rdx), %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0x72,0x80] + vpermw -8192(%rdx), %zmm19, %zmm22 + +// CHECK: vpermw -8256(%rdx), %zmm19, %zmm22 +// CHECK: encoding: [0x62,0xe2,0xe5,0x40,0x8d,0xb2,0xc0,0xdf,0xff,0xff] + vpermw -8256(%rdx), %zmm19, %zmm22