[AVX-512] Expanded rsqrt/rcp instructions to VL subset.

Refactored multiclass through AVX512_maskable

llvm-svn: 220783
This commit is contained in:
Robert Khasanov 2014-10-28 16:37:13 +00:00
parent 96261ddfa1
commit 3e534c93b9
3 changed files with 718 additions and 19 deletions

View File

@ -104,11 +104,15 @@ def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
X86VectorVTInfo i128> {
@ -4116,26 +4120,49 @@ def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, ValueType OpVt> {
def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr,
" \t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
EVEX;
def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
EVEX;
X86VectorVTInfo _> {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
let mayLoad = 1 in {
defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.FloatVT
(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, T8PD, EVEX_B;
}
}
defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
OpNode, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
OpNode, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
OpNode, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
OpNode, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),

View File

@ -3193,6 +3193,230 @@
// CHECK: encoding: [0x62,0xf1,0xcd,0x50,0xef,0xba,0xf8,0xfb,0xff,0xff]
vpxorq -1032(%rdx){1to8}, %zmm22, %zmm7
// CHECK: vrcp14pd %zmm4, %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0xec]
vrcp14pd %zmm4, %zmm13
// CHECK: vrcp14pd %zmm4, %zmm13 {%k5}
// CHECK: encoding: [0x62,0x72,0xfd,0x4d,0x4c,0xec]
vrcp14pd %zmm4, %zmm13 {%k5}
// CHECK: vrcp14pd %zmm4, %zmm13 {%k5} {z}
// CHECK: encoding: [0x62,0x72,0xfd,0xcd,0x4c,0xec]
vrcp14pd %zmm4, %zmm13 {%k5} {z}
// CHECK: vrcp14pd (%rcx), %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0x29]
vrcp14pd (%rcx), %zmm13
// CHECK: vrcp14pd 291(%rax,%r14,8), %zmm13
// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x4c,0xac,0xf0,0x23,0x01,0x00,0x00]
vrcp14pd 291(%rax,%r14,8), %zmm13
// CHECK: vrcp14pd (%rcx){1to8}, %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0x29]
vrcp14pd (%rcx){1to8}, %zmm13
// CHECK: vrcp14pd 8128(%rdx), %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0x6a,0x7f]
vrcp14pd 8128(%rdx), %zmm13
// CHECK: vrcp14pd 8192(%rdx), %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0xaa,0x00,0x20,0x00,0x00]
vrcp14pd 8192(%rdx), %zmm13
// CHECK: vrcp14pd -8192(%rdx), %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0x6a,0x80]
vrcp14pd -8192(%rdx), %zmm13
// CHECK: vrcp14pd -8256(%rdx), %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x4c,0xaa,0xc0,0xdf,0xff,0xff]
vrcp14pd -8256(%rdx), %zmm13
// CHECK: vrcp14pd 1016(%rdx){1to8}, %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0x6a,0x7f]
vrcp14pd 1016(%rdx){1to8}, %zmm13
// CHECK: vrcp14pd 1024(%rdx){1to8}, %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0xaa,0x00,0x04,0x00,0x00]
vrcp14pd 1024(%rdx){1to8}, %zmm13
// CHECK: vrcp14pd -1024(%rdx){1to8}, %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0x6a,0x80]
vrcp14pd -1024(%rdx){1to8}, %zmm13
// CHECK: vrcp14pd -1032(%rdx){1to8}, %zmm13
// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x4c,0xaa,0xf8,0xfb,0xff,0xff]
vrcp14pd -1032(%rdx){1to8}, %zmm13
// CHECK: vrcp14ps %zmm25, %zmm10
// CHECK: encoding: [0x62,0x12,0x7d,0x48,0x4c,0xd1]
vrcp14ps %zmm25, %zmm10
// CHECK: vrcp14ps %zmm25, %zmm10 {%k1}
// CHECK: encoding: [0x62,0x12,0x7d,0x49,0x4c,0xd1]
vrcp14ps %zmm25, %zmm10 {%k1}
// CHECK: vrcp14ps %zmm25, %zmm10 {%k1} {z}
// CHECK: encoding: [0x62,0x12,0x7d,0xc9,0x4c,0xd1]
vrcp14ps %zmm25, %zmm10 {%k1} {z}
// CHECK: vrcp14ps (%rcx), %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x11]
vrcp14ps (%rcx), %zmm10
// CHECK: vrcp14ps 291(%rax,%r14,8), %zmm10
// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x4c,0x94,0xf0,0x23,0x01,0x00,0x00]
vrcp14ps 291(%rax,%r14,8), %zmm10
// CHECK: vrcp14ps (%rcx){1to16}, %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x11]
vrcp14ps (%rcx){1to16}, %zmm10
// CHECK: vrcp14ps 8128(%rdx), %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x52,0x7f]
vrcp14ps 8128(%rdx), %zmm10
// CHECK: vrcp14ps 8192(%rdx), %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x92,0x00,0x20,0x00,0x00]
vrcp14ps 8192(%rdx), %zmm10
// CHECK: vrcp14ps -8192(%rdx), %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x52,0x80]
vrcp14ps -8192(%rdx), %zmm10
// CHECK: vrcp14ps -8256(%rdx), %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x4c,0x92,0xc0,0xdf,0xff,0xff]
vrcp14ps -8256(%rdx), %zmm10
// CHECK: vrcp14ps 508(%rdx){1to16}, %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x52,0x7f]
vrcp14ps 508(%rdx){1to16}, %zmm10
// CHECK: vrcp14ps 512(%rdx){1to16}, %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x92,0x00,0x02,0x00,0x00]
vrcp14ps 512(%rdx){1to16}, %zmm10
// CHECK: vrcp14ps -512(%rdx){1to16}, %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x52,0x80]
vrcp14ps -512(%rdx){1to16}, %zmm10
// CHECK: vrcp14ps -516(%rdx){1to16}, %zmm10
// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x4c,0x92,0xfc,0xfd,0xff,0xff]
vrcp14ps -516(%rdx){1to16}, %zmm10
// CHECK: vrsqrt14pd %zmm14, %zmm19
// CHECK: encoding: [0x62,0xc2,0xfd,0x48,0x4e,0xde]
vrsqrt14pd %zmm14, %zmm19
// CHECK: vrsqrt14pd %zmm14, %zmm19 {%k1}
// CHECK: encoding: [0x62,0xc2,0xfd,0x49,0x4e,0xde]
vrsqrt14pd %zmm14, %zmm19 {%k1}
// CHECK: vrsqrt14pd %zmm14, %zmm19 {%k1} {z}
// CHECK: encoding: [0x62,0xc2,0xfd,0xc9,0x4e,0xde]
vrsqrt14pd %zmm14, %zmm19 {%k1} {z}
// CHECK: vrsqrt14pd (%rcx), %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x19]
vrsqrt14pd (%rcx), %zmm19
// CHECK: vrsqrt14pd 291(%rax,%r14,8), %zmm19
// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00]
vrsqrt14pd 291(%rax,%r14,8), %zmm19
// CHECK: vrsqrt14pd (%rcx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x19]
vrsqrt14pd (%rcx){1to8}, %zmm19
// CHECK: vrsqrt14pd 8128(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x5a,0x7f]
vrsqrt14pd 8128(%rdx), %zmm19
// CHECK: vrsqrt14pd 8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x9a,0x00,0x20,0x00,0x00]
vrsqrt14pd 8192(%rdx), %zmm19
// CHECK: vrsqrt14pd -8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x5a,0x80]
vrsqrt14pd -8192(%rdx), %zmm19
// CHECK: vrsqrt14pd -8256(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x9a,0xc0,0xdf,0xff,0xff]
vrsqrt14pd -8256(%rdx), %zmm19
// CHECK: vrsqrt14pd 1016(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x5a,0x7f]
vrsqrt14pd 1016(%rdx){1to8}, %zmm19
// CHECK: vrsqrt14pd 1024(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x9a,0x00,0x04,0x00,0x00]
vrsqrt14pd 1024(%rdx){1to8}, %zmm19
// CHECK: vrsqrt14pd -1024(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x5a,0x80]
vrsqrt14pd -1024(%rdx){1to8}, %zmm19
// CHECK: vrsqrt14pd -1032(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x9a,0xf8,0xfb,0xff,0xff]
vrsqrt14pd -1032(%rdx){1to8}, %zmm19
// CHECK: vrsqrt14ps %zmm9, %zmm16
// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x4e,0xc1]
vrsqrt14ps %zmm9, %zmm16
// CHECK: vrsqrt14ps %zmm9, %zmm16 {%k5}
// CHECK: encoding: [0x62,0xc2,0x7d,0x4d,0x4e,0xc1]
vrsqrt14ps %zmm9, %zmm16 {%k5}
// CHECK: vrsqrt14ps %zmm9, %zmm16 {%k5} {z}
// CHECK: encoding: [0x62,0xc2,0x7d,0xcd,0x4e,0xc1]
vrsqrt14ps %zmm9, %zmm16 {%k5} {z}
// CHECK: vrsqrt14ps (%rcx), %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x01]
vrsqrt14ps (%rcx), %zmm16
// CHECK: vrsqrt14ps 291(%rax,%r14,8), %zmm16
// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x4e,0x84,0xf0,0x23,0x01,0x00,0x00]
vrsqrt14ps 291(%rax,%r14,8), %zmm16
// CHECK: vrsqrt14ps (%rcx){1to16}, %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x01]
vrsqrt14ps (%rcx){1to16}, %zmm16
// CHECK: vrsqrt14ps 8128(%rdx), %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x42,0x7f]
vrsqrt14ps 8128(%rdx), %zmm16
// CHECK: vrsqrt14ps 8192(%rdx), %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x82,0x00,0x20,0x00,0x00]
vrsqrt14ps 8192(%rdx), %zmm16
// CHECK: vrsqrt14ps -8192(%rdx), %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x42,0x80]
vrsqrt14ps -8192(%rdx), %zmm16
// CHECK: vrsqrt14ps -8256(%rdx), %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x82,0xc0,0xdf,0xff,0xff]
vrsqrt14ps -8256(%rdx), %zmm16
// CHECK: vrsqrt14ps 508(%rdx){1to16}, %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x42,0x7f]
vrsqrt14ps 508(%rdx){1to16}, %zmm16
// CHECK: vrsqrt14ps 512(%rdx){1to16}, %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x82,0x00,0x02,0x00,0x00]
vrsqrt14ps 512(%rdx){1to16}, %zmm16
// CHECK: vrsqrt14ps -512(%rdx){1to16}, %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x42,0x80]
vrsqrt14ps -512(%rdx){1to16}, %zmm16
// CHECK: vrsqrt14ps -516(%rdx){1to16}, %zmm16
// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x82,0xfc,0xfd,0xff,0xff]
vrsqrt14ps -516(%rdx){1to16}, %zmm16
// CHECK: vsubpd %zmm9, %zmm12, %zmm9
// CHECK: encoding: [0x62,0x51,0x9d,0x48,0x5c,0xc9]
vsubpd %zmm9, %zmm12, %zmm9

View File

@ -3984,6 +3984,454 @@
// CHECK: encoding: [0x62,0xe1,0xed,0x30,0xef,0xaa,0xf8,0xfb,0xff,0xff]
vpxorq -1032(%rdx){1to4}, %ymm18, %ymm21
// CHECK: vrcp14pd %xmm29, %xmm18
// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x4c,0xd5]
vrcp14pd %xmm29, %xmm18
// CHECK: vrcp14pd %xmm29, %xmm18 {%k4}
// CHECK: encoding: [0x62,0x82,0xfd,0x0c,0x4c,0xd5]
vrcp14pd %xmm29, %xmm18 {%k4}
// CHECK: vrcp14pd %xmm29, %xmm18 {%k4} {z}
// CHECK: encoding: [0x62,0x82,0xfd,0x8c,0x4c,0xd5]
vrcp14pd %xmm29, %xmm18 {%k4} {z}
// CHECK: vrcp14pd (%rcx), %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x11]
vrcp14pd (%rcx), %xmm18
// CHECK: vrcp14pd 291(%rax,%r14,8), %xmm18
// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x4c,0x94,0xf0,0x23,0x01,0x00,0x00]
vrcp14pd 291(%rax,%r14,8), %xmm18
// CHECK: vrcp14pd (%rcx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x11]
vrcp14pd (%rcx){1to2}, %xmm18
// CHECK: vrcp14pd 2032(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x52,0x7f]
vrcp14pd 2032(%rdx), %xmm18
// CHECK: vrcp14pd 2048(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x92,0x00,0x08,0x00,0x00]
vrcp14pd 2048(%rdx), %xmm18
// CHECK: vrcp14pd -2048(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x52,0x80]
vrcp14pd -2048(%rdx), %xmm18
// CHECK: vrcp14pd -2064(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x92,0xf0,0xf7,0xff,0xff]
vrcp14pd -2064(%rdx), %xmm18
// CHECK: vrcp14pd 1016(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x52,0x7f]
vrcp14pd 1016(%rdx){1to2}, %xmm18
// CHECK: vrcp14pd 1024(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x92,0x00,0x04,0x00,0x00]
vrcp14pd 1024(%rdx){1to2}, %xmm18
// CHECK: vrcp14pd -1024(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x52,0x80]
vrcp14pd -1024(%rdx){1to2}, %xmm18
// CHECK: vrcp14pd -1032(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x92,0xf8,0xfb,0xff,0xff]
vrcp14pd -1032(%rdx){1to2}, %xmm18
// CHECK: vrcp14pd %ymm29, %ymm17
// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x4c,0xcd]
vrcp14pd %ymm29, %ymm17
// CHECK: vrcp14pd %ymm29, %ymm17 {%k4}
// CHECK: encoding: [0x62,0x82,0xfd,0x2c,0x4c,0xcd]
vrcp14pd %ymm29, %ymm17 {%k4}
// CHECK: vrcp14pd %ymm29, %ymm17 {%k4} {z}
// CHECK: encoding: [0x62,0x82,0xfd,0xac,0x4c,0xcd]
vrcp14pd %ymm29, %ymm17 {%k4} {z}
// CHECK: vrcp14pd (%rcx), %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x09]
vrcp14pd (%rcx), %ymm17
// CHECK: vrcp14pd 291(%rax,%r14,8), %ymm17
// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x4c,0x8c,0xf0,0x23,0x01,0x00,0x00]
vrcp14pd 291(%rax,%r14,8), %ymm17
// CHECK: vrcp14pd (%rcx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x09]
vrcp14pd (%rcx){1to4}, %ymm17
// CHECK: vrcp14pd 4064(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x4a,0x7f]
vrcp14pd 4064(%rdx), %ymm17
// CHECK: vrcp14pd 4096(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x8a,0x00,0x10,0x00,0x00]
vrcp14pd 4096(%rdx), %ymm17
// CHECK: vrcp14pd -4096(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x4a,0x80]
vrcp14pd -4096(%rdx), %ymm17
// CHECK: vrcp14pd -4128(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x8a,0xe0,0xef,0xff,0xff]
vrcp14pd -4128(%rdx), %ymm17
// CHECK: vrcp14pd 1016(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x4a,0x7f]
vrcp14pd 1016(%rdx){1to4}, %ymm17
// CHECK: vrcp14pd 1024(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x8a,0x00,0x04,0x00,0x00]
vrcp14pd 1024(%rdx){1to4}, %ymm17
// CHECK: vrcp14pd -1024(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x4a,0x80]
vrcp14pd -1024(%rdx){1to4}, %ymm17
// CHECK: vrcp14pd -1032(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x8a,0xf8,0xfb,0xff,0xff]
vrcp14pd -1032(%rdx){1to4}, %ymm17
// CHECK: vrcp14ps %xmm28, %xmm27
// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x4c,0xdc]
vrcp14ps %xmm28, %xmm27
// CHECK: vrcp14ps %xmm28, %xmm27 {%k4}
// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x4c,0xdc]
vrcp14ps %xmm28, %xmm27 {%k4}
// CHECK: vrcp14ps %xmm28, %xmm27 {%k4} {z}
// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x4c,0xdc]
vrcp14ps %xmm28, %xmm27 {%k4} {z}
// CHECK: vrcp14ps (%rcx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x19]
vrcp14ps (%rcx), %xmm27
// CHECK: vrcp14ps 291(%rax,%r14,8), %xmm27
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x4c,0x9c,0xf0,0x23,0x01,0x00,0x00]
vrcp14ps 291(%rax,%r14,8), %xmm27
// CHECK: vrcp14ps (%rcx){1to4}, %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x19]
vrcp14ps (%rcx){1to4}, %xmm27
// CHECK: vrcp14ps 2032(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x5a,0x7f]
vrcp14ps 2032(%rdx), %xmm27
// CHECK: vrcp14ps 2048(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x9a,0x00,0x08,0x00,0x00]
vrcp14ps 2048(%rdx), %xmm27
// CHECK: vrcp14ps -2048(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x5a,0x80]
vrcp14ps -2048(%rdx), %xmm27
// CHECK: vrcp14ps -2064(%rdx), %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x4c,0x9a,0xf0,0xf7,0xff,0xff]
vrcp14ps -2064(%rdx), %xmm27
// CHECK: vrcp14ps 508(%rdx){1to4}, %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x5a,0x7f]
vrcp14ps 508(%rdx){1to4}, %xmm27
// CHECK: vrcp14ps 512(%rdx){1to4}, %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x9a,0x00,0x02,0x00,0x00]
vrcp14ps 512(%rdx){1to4}, %xmm27
// CHECK: vrcp14ps -512(%rdx){1to4}, %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x5a,0x80]
vrcp14ps -512(%rdx){1to4}, %xmm27
// CHECK: vrcp14ps -516(%rdx){1to4}, %xmm27
// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x4c,0x9a,0xfc,0xfd,0xff,0xff]
vrcp14ps -516(%rdx){1to4}, %xmm27
// CHECK: vrcp14ps %ymm21, %ymm29
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4c,0xed]
vrcp14ps %ymm21, %ymm29
// CHECK: vrcp14ps %ymm21, %ymm29 {%k7}
// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x4c,0xed]
vrcp14ps %ymm21, %ymm29 {%k7}
// CHECK: vrcp14ps %ymm21, %ymm29 {%k7} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x4c,0xed]
vrcp14ps %ymm21, %ymm29 {%k7} {z}
// CHECK: vrcp14ps (%rcx), %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0x29]
vrcp14ps (%rcx), %ymm29
// CHECK: vrcp14ps 291(%rax,%r14,8), %ymm29
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4c,0xac,0xf0,0x23,0x01,0x00,0x00]
vrcp14ps 291(%rax,%r14,8), %ymm29
// CHECK: vrcp14ps (%rcx){1to8}, %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0x29]
vrcp14ps (%rcx){1to8}, %ymm29
// CHECK: vrcp14ps 4064(%rdx), %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0x6a,0x7f]
vrcp14ps 4064(%rdx), %ymm29
// CHECK: vrcp14ps 4096(%rdx), %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0xaa,0x00,0x10,0x00,0x00]
vrcp14ps 4096(%rdx), %ymm29
// CHECK: vrcp14ps -4096(%rdx), %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0x6a,0x80]
vrcp14ps -4096(%rdx), %ymm29
// CHECK: vrcp14ps -4128(%rdx), %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4c,0xaa,0xe0,0xef,0xff,0xff]
vrcp14ps -4128(%rdx), %ymm29
// CHECK: vrcp14ps 508(%rdx){1to8}, %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0x6a,0x7f]
vrcp14ps 508(%rdx){1to8}, %ymm29
// CHECK: vrcp14ps 512(%rdx){1to8}, %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0xaa,0x00,0x02,0x00,0x00]
vrcp14ps 512(%rdx){1to8}, %ymm29
// CHECK: vrcp14ps -512(%rdx){1to8}, %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0x6a,0x80]
vrcp14ps -512(%rdx){1to8}, %ymm29
// CHECK: vrcp14ps -516(%rdx){1to8}, %ymm29
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4c,0xaa,0xfc,0xfd,0xff,0xff]
vrcp14ps -516(%rdx){1to8}, %ymm29
// CHECK: vrsqrt14pd %xmm28, %xmm21
// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x4e,0xec]
vrsqrt14pd %xmm28, %xmm21
// CHECK: vrsqrt14pd %xmm28, %xmm21 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x09,0x4e,0xec]
vrsqrt14pd %xmm28, %xmm21 {%k1}
// CHECK: vrsqrt14pd %xmm28, %xmm21 {%k1} {z}
// CHECK: encoding: [0x62,0x82,0xfd,0x89,0x4e,0xec]
vrsqrt14pd %xmm28, %xmm21 {%k1} {z}
// CHECK: vrsqrt14pd (%rcx), %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x29]
vrsqrt14pd (%rcx), %xmm21
// CHECK: vrsqrt14pd 291(%rax,%r14,8), %xmm21
// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x4e,0xac,0xf0,0x23,0x01,0x00,0x00]
vrsqrt14pd 291(%rax,%r14,8), %xmm21
// CHECK: vrsqrt14pd (%rcx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x29]
vrsqrt14pd (%rcx){1to2}, %xmm21
// CHECK: vrsqrt14pd 2032(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x6a,0x7f]
vrsqrt14pd 2032(%rdx), %xmm21
// CHECK: vrsqrt14pd 2048(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0xaa,0x00,0x08,0x00,0x00]
vrsqrt14pd 2048(%rdx), %xmm21
// CHECK: vrsqrt14pd -2048(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x6a,0x80]
vrsqrt14pd -2048(%rdx), %xmm21
// CHECK: vrsqrt14pd -2064(%rdx), %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x4e,0xaa,0xf0,0xf7,0xff,0xff]
vrsqrt14pd -2064(%rdx), %xmm21
// CHECK: vrsqrt14pd 1016(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x6a,0x7f]
vrsqrt14pd 1016(%rdx){1to2}, %xmm21
// CHECK: vrsqrt14pd 1024(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0xaa,0x00,0x04,0x00,0x00]
vrsqrt14pd 1024(%rdx){1to2}, %xmm21
// CHECK: vrsqrt14pd -1024(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x6a,0x80]
vrsqrt14pd -1024(%rdx){1to2}, %xmm21
// CHECK: vrsqrt14pd -1032(%rdx){1to2}, %xmm21
// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x4e,0xaa,0xf8,0xfb,0xff,0xff]
vrsqrt14pd -1032(%rdx){1to2}, %xmm21
// CHECK: vrsqrt14pd %ymm19, %ymm18
// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x4e,0xd3]
vrsqrt14pd %ymm19, %ymm18
// CHECK: vrsqrt14pd %ymm19, %ymm18 {%k4}
// CHECK: encoding: [0x62,0xa2,0xfd,0x2c,0x4e,0xd3]
vrsqrt14pd %ymm19, %ymm18 {%k4}
// CHECK: vrsqrt14pd %ymm19, %ymm18 {%k4} {z}
// CHECK: encoding: [0x62,0xa2,0xfd,0xac,0x4e,0xd3]
vrsqrt14pd %ymm19, %ymm18 {%k4} {z}
// CHECK: vrsqrt14pd (%rcx), %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x11]
vrsqrt14pd (%rcx), %ymm18
// CHECK: vrsqrt14pd 291(%rax,%r14,8), %ymm18
// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x4e,0x94,0xf0,0x23,0x01,0x00,0x00]
vrsqrt14pd 291(%rax,%r14,8), %ymm18
// CHECK: vrsqrt14pd (%rcx){1to4}, %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x11]
vrsqrt14pd (%rcx){1to4}, %ymm18
// CHECK: vrsqrt14pd 4064(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x52,0x7f]
vrsqrt14pd 4064(%rdx), %ymm18
// CHECK: vrsqrt14pd 4096(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x92,0x00,0x10,0x00,0x00]
vrsqrt14pd 4096(%rdx), %ymm18
// CHECK: vrsqrt14pd -4096(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x52,0x80]
vrsqrt14pd -4096(%rdx), %ymm18
// CHECK: vrsqrt14pd -4128(%rdx), %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x92,0xe0,0xef,0xff,0xff]
vrsqrt14pd -4128(%rdx), %ymm18
// CHECK: vrsqrt14pd 1016(%rdx){1to4}, %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x52,0x7f]
vrsqrt14pd 1016(%rdx){1to4}, %ymm18
// CHECK: vrsqrt14pd 1024(%rdx){1to4}, %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x92,0x00,0x04,0x00,0x00]
vrsqrt14pd 1024(%rdx){1to4}, %ymm18
// CHECK: vrsqrt14pd -1024(%rdx){1to4}, %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x52,0x80]
vrsqrt14pd -1024(%rdx){1to4}, %ymm18
// CHECK: vrsqrt14pd -1032(%rdx){1to4}, %ymm18
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x92,0xf8,0xfb,0xff,0xff]
vrsqrt14pd -1032(%rdx){1to4}, %ymm18
// CHECK: vrsqrt14ps %xmm20, %xmm19
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x4e,0xdc]
vrsqrt14ps %xmm20, %xmm19
// CHECK: vrsqrt14ps %xmm20, %xmm19 {%k7}
// CHECK: encoding: [0x62,0xa2,0x7d,0x0f,0x4e,0xdc]
vrsqrt14ps %xmm20, %xmm19 {%k7}
// CHECK: vrsqrt14ps %xmm20, %xmm19 {%k7} {z}
// CHECK: encoding: [0x62,0xa2,0x7d,0x8f,0x4e,0xdc]
vrsqrt14ps %xmm20, %xmm19 {%k7} {z}
// CHECK: vrsqrt14ps (%rcx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x19]
vrsqrt14ps (%rcx), %xmm19
// CHECK: vrsqrt14ps 291(%rax,%r14,8), %xmm19
// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00]
vrsqrt14ps 291(%rax,%r14,8), %xmm19
// CHECK: vrsqrt14ps (%rcx){1to4}, %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x19]
vrsqrt14ps (%rcx){1to4}, %xmm19
// CHECK: vrsqrt14ps 2032(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x5a,0x7f]
vrsqrt14ps 2032(%rdx), %xmm19
// CHECK: vrsqrt14ps 2048(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x9a,0x00,0x08,0x00,0x00]
vrsqrt14ps 2048(%rdx), %xmm19
// CHECK: vrsqrt14ps -2048(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x5a,0x80]
vrsqrt14ps -2048(%rdx), %xmm19
// CHECK: vrsqrt14ps -2064(%rdx), %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x9a,0xf0,0xf7,0xff,0xff]
vrsqrt14ps -2064(%rdx), %xmm19
// CHECK: vrsqrt14ps 508(%rdx){1to4}, %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x5a,0x7f]
vrsqrt14ps 508(%rdx){1to4}, %xmm19
// CHECK: vrsqrt14ps 512(%rdx){1to4}, %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x9a,0x00,0x02,0x00,0x00]
vrsqrt14ps 512(%rdx){1to4}, %xmm19
// CHECK: vrsqrt14ps -512(%rdx){1to4}, %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x5a,0x80]
vrsqrt14ps -512(%rdx){1to4}, %xmm19
// CHECK: vrsqrt14ps -516(%rdx){1to4}, %xmm19
// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x9a,0xfc,0xfd,0xff,0xff]
vrsqrt14ps -516(%rdx){1to4}, %xmm19
// CHECK: vrsqrt14ps %ymm18, %ymm27
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4e,0xda]
vrsqrt14ps %ymm18, %ymm27
// CHECK: vrsqrt14ps %ymm18, %ymm27 {%k7}
// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x4e,0xda]
vrsqrt14ps %ymm18, %ymm27 {%k7}
// CHECK: vrsqrt14ps %ymm18, %ymm27 {%k7} {z}
// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x4e,0xda]
vrsqrt14ps %ymm18, %ymm27 {%k7} {z}
// CHECK: vrsqrt14ps (%rcx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x19]
vrsqrt14ps (%rcx), %ymm27
// CHECK: vrsqrt14ps 291(%rax,%r14,8), %ymm27
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00]
vrsqrt14ps 291(%rax,%r14,8), %ymm27
// CHECK: vrsqrt14ps (%rcx){1to8}, %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x19]
vrsqrt14ps (%rcx){1to8}, %ymm27
// CHECK: vrsqrt14ps 4064(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x5a,0x7f]
vrsqrt14ps 4064(%rdx), %ymm27
// CHECK: vrsqrt14ps 4096(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x9a,0x00,0x10,0x00,0x00]
vrsqrt14ps 4096(%rdx), %ymm27
// CHECK: vrsqrt14ps -4096(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x5a,0x80]
vrsqrt14ps -4096(%rdx), %ymm27
// CHECK: vrsqrt14ps -4128(%rdx), %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x4e,0x9a,0xe0,0xef,0xff,0xff]
vrsqrt14ps -4128(%rdx), %ymm27
// CHECK: vrsqrt14ps 508(%rdx){1to8}, %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x5a,0x7f]
vrsqrt14ps 508(%rdx){1to8}, %ymm27
// CHECK: vrsqrt14ps 512(%rdx){1to8}, %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x9a,0x00,0x02,0x00,0x00]
vrsqrt14ps 512(%rdx){1to8}, %ymm27
// CHECK: vrsqrt14ps -512(%rdx){1to8}, %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x5a,0x80]
vrsqrt14ps -512(%rdx){1to8}, %ymm27
// CHECK: vrsqrt14ps -516(%rdx){1to8}, %ymm27
// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x4e,0x9a,0xfc,0xfd,0xff,0xff]
vrsqrt14ps -516(%rdx){1to8}, %ymm27
// CHECK: vmovapd %xmm22, (%rcx)
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x29,0x31]
vmovapd %xmm22, (%rcx)