forked from OSchip/llvm-project
[SKX] Extended non-temporal load/store instructions for AVX512VL subsets.
Added avx512_movnt_vl multiclass for handling 256/128-bit forms of instruction. Added encoding and lowering tests. Reviewed by Elena Demikhovsky <elena.demikhovsky@intel.com> llvm-svn: 215536
This commit is contained in:
parent
d97a634f12
commit
ed8829703f
|
@ -1954,8 +1954,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
llvm_i32_ty], [IntrNoMem, Commutative]>;
|
llvm_i32_ty], [IntrNoMem, Commutative]>;
|
||||||
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
|
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
|
||||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||||
def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
|
|
||||||
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -3219,6 +3217,8 @@ let TargetPrefix = "x86" in {
|
||||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||||
llvm_v8i64_ty, llvm_i8_ty],
|
llvm_v8i64_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
|
||||||
|
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -2090,43 +2090,73 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AVX-512 - Non-temporals
|
// AVX-512 - Non-temporals
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
let SchedRW = [WriteLoad] in {
|
||||||
|
def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
|
||||||
|
(ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
|
||||||
|
[(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
|
||||||
|
SSEPackedInt>, EVEX, T8PD, EVEX_V512,
|
||||||
|
EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
|
let Predicates = [HasAVX512, HasVLX] in {
|
||||||
(ins i512mem:$src),
|
def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
|
||||||
"vmovntdqa\t{$src, $dst|$dst, $src}",
|
(ins i256mem:$src),
|
||||||
[(set VR512:$dst,
|
"vmovntdqa\t{$src, $dst|$dst, $src}", [],
|
||||||
(int_x86_avx512_movntdqa addr:$src))]>,
|
SSEPackedInt>, EVEX, T8PD, EVEX_V256,
|
||||||
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
// Prefer non-temporal over temporal versions
|
def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
|
||||||
let AddedComplexity = 400, SchedRW = [WriteStore] in {
|
(ins i128mem:$src),
|
||||||
|
"vmovntdqa\t{$src, $dst|$dst, $src}", [],
|
||||||
def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
|
SSEPackedInt>, EVEX, T8PD, EVEX_V128,
|
||||||
(ins f512mem:$dst, VR512:$src),
|
EVEX_CD8<64, CD8VF>;
|
||||||
"vmovntps\t{$src, $dst|$dst, $src}",
|
}
|
||||||
[(alignednontemporalstore (v16f32 VR512:$src),
|
|
||||||
addr:$dst)],
|
|
||||||
IIC_SSE_MOVNT>,
|
|
||||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
|
||||||
|
|
||||||
def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
|
|
||||||
(ins f512mem:$dst, VR512:$src),
|
|
||||||
"vmovntpd\t{$src, $dst|$dst, $src}",
|
|
||||||
[(alignednontemporalstore (v8f64 VR512:$src),
|
|
||||||
addr:$dst)],
|
|
||||||
IIC_SSE_MOVNT>,
|
|
||||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
|
|
||||||
|
|
||||||
def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
|
|
||||||
(ins i512mem:$dst, VR512:$src),
|
|
||||||
"vmovntdq\t{$src, $dst|$dst, $src}",
|
|
||||||
[(alignednontemporalstore (v8i64 VR512:$src),
|
|
||||||
addr:$dst)],
|
|
||||||
IIC_SSE_MOVNT>,
|
|
||||||
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
|
||||||
|
ValueType OpVT, RegisterClass RC, X86MemOperand memop,
|
||||||
|
Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
|
||||||
|
let SchedRW = [WriteStore], mayStore = 1,
|
||||||
|
AddedComplexity = 400 in
|
||||||
|
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
|
||||||
|
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||||
|
[(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
|
||||||
|
string elty, string elsz, string vsz512,
|
||||||
|
string vsz256, string vsz128, Domain d,
|
||||||
|
Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
|
||||||
|
let Predicates = [prd] in
|
||||||
|
defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
|
||||||
|
!cast<ValueType>("v"##vsz512##elty##elsz), VR512,
|
||||||
|
!cast<X86MemOperand>(elty##"512mem"), d, itin>,
|
||||||
|
EVEX_V512;
|
||||||
|
|
||||||
|
let Predicates = [prd, HasVLX] in {
|
||||||
|
defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
|
||||||
|
!cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
|
||||||
|
!cast<X86MemOperand>(elty##"256mem"), d, itin>,
|
||||||
|
EVEX_V256;
|
||||||
|
|
||||||
|
defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
|
||||||
|
!cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
|
||||||
|
!cast<X86MemOperand>(elty##"128mem"), d, itin>,
|
||||||
|
EVEX_V128;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
|
||||||
|
"i", "64", "8", "4", "2", SSEPackedInt,
|
||||||
|
HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
|
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
|
||||||
|
"f", "64", "8", "4", "2", SSEPackedDouble,
|
||||||
|
HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
|
||||||
|
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
|
||||||
|
"f", "32", "16", "8", "4", SSEPackedSingle,
|
||||||
|
HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// AVX-512 - Integer arithmetic
|
// AVX-512 - Integer arithmetic
|
||||||
//
|
//
|
||||||
|
|
|
@ -727,6 +727,7 @@ def HasDQI : Predicate<"Subtarget->hasDQI()">;
|
||||||
def HasBWI : Predicate<"Subtarget->hasBWI()">;
|
def HasBWI : Predicate<"Subtarget->hasBWI()">;
|
||||||
def HasVLX : Predicate<"Subtarget->hasVLX()">,
|
def HasVLX : Predicate<"Subtarget->hasVLX()">,
|
||||||
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
|
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
|
||||||
|
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
|
||||||
|
|
||||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||||
|
|
|
@ -3697,6 +3697,7 @@ let Predicates = [UseSSE1] in {
|
||||||
|
|
||||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||||
let SchedRW = [WriteStore] in {
|
let SchedRW = [WriteStore] in {
|
||||||
|
let Predicates = [HasAVX, NoVLX] in {
|
||||||
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f128mem:$dst, VR128:$src),
|
(ins f128mem:$dst, VR128:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
|
@ -3737,6 +3738,7 @@ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||||
[(alignednontemporalstore (v4i64 VR256:$src),
|
[(alignednontemporalstore (v4i64 VR256:$src),
|
||||||
addr:$dst)],
|
addr:$dst)],
|
||||||
IIC_SSE_MOVNT>, VEX, VEX_L;
|
IIC_SSE_MOVNT>, VEX, VEX_L;
|
||||||
|
}
|
||||||
|
|
||||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
|
||||||
|
|
||||||
|
define void @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE) {
|
||||||
|
; CHECK: vmovntps %ymm{{.*}} ## encoding: [0x62
|
||||||
|
%cast = bitcast i8* %B to <8 x float>*
|
||||||
|
%A2 = fadd <8 x float> %A, %AA
|
||||||
|
store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0
|
||||||
|
; CHECK: vmovntdq %ymm{{.*}} ## encoding: [0x62
|
||||||
|
%cast1 = bitcast i8* %B to <4 x i64>*
|
||||||
|
%E2 = add <4 x i64> %E, %EE
|
||||||
|
store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0
|
||||||
|
; CHECK: vmovntpd %ymm{{.*}} ## encoding: [0x62
|
||||||
|
%cast2 = bitcast i8* %B to <4 x double>*
|
||||||
|
%C2 = fadd <4 x double> %C, %CC
|
||||||
|
store <4 x double> %C2, <4 x double>* %cast2, align 64, !nontemporal !0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE) {
|
||||||
|
; CHECK: vmovntps %xmm{{.*}} ## encoding: [0x62
|
||||||
|
%cast = bitcast i8* %B to <4 x float>*
|
||||||
|
%A2 = fadd <4 x float> %A, %AA
|
||||||
|
store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0
|
||||||
|
; CHECK: vmovntdq %xmm{{.*}} ## encoding: [0x62
|
||||||
|
%cast1 = bitcast i8* %B to <2 x i64>*
|
||||||
|
%E2 = add <2 x i64> %E, %EE
|
||||||
|
store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0
|
||||||
|
; CHECK: vmovntpd %xmm{{.*}} ## encoding: [0x62
|
||||||
|
%cast2 = bitcast i8* %B to <2 x double>*
|
||||||
|
%C2 = fadd <2 x double> %C, %CC
|
||||||
|
store <2 x double> %C2, <2 x double>* %cast2, align 64, !nontemporal !0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
!0 = metadata !{i32 1}
|
|
@ -665,6 +665,102 @@
|
||||||
// CHECK: encoding: [0x62,0xf1,0xfe,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
|
// CHECK: encoding: [0x62,0xf1,0xfe,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
|
||||||
vmovdqu64 -8256(%rdx), %zmm6
|
vmovdqu64 -8256(%rdx), %zmm6
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %zmm24, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x01]
|
||||||
|
vmovntdq %zmm24, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %zmm24, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntdq %zmm24, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %zmm24, 8128(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x7f]
|
||||||
|
vmovntdq %zmm24, 8128(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %zmm24, 8192(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0x00,0x20,0x00,0x00]
|
||||||
|
vmovntdq %zmm24, 8192(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %zmm24, -8192(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x80]
|
||||||
|
vmovntdq %zmm24, -8192(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %zmm24, -8256(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0xc0,0xdf,0xff,0xff]
|
||||||
|
vmovntdq %zmm24, -8256(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa (%rcx), %zmm17
|
||||||
|
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x09]
|
||||||
|
vmovntdqa (%rcx), %zmm17
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 291(%rax,%r14,8), %zmm17
|
||||||
|
// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x2a,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntdqa 291(%rax,%r14,8), %zmm17
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 8128(%rdx), %zmm17
|
||||||
|
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x7f]
|
||||||
|
vmovntdqa 8128(%rdx), %zmm17
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 8192(%rdx), %zmm17
|
||||||
|
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0x00,0x20,0x00,0x00]
|
||||||
|
vmovntdqa 8192(%rdx), %zmm17
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa -8192(%rdx), %zmm17
|
||||||
|
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x80]
|
||||||
|
vmovntdqa -8192(%rdx), %zmm17
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa -8256(%rdx), %zmm17
|
||||||
|
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0xc0,0xdf,0xff,0xff]
|
||||||
|
vmovntdqa -8256(%rdx), %zmm17
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %zmm17, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x09]
|
||||||
|
vmovntpd %zmm17, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %zmm17, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0xa1,0xfd,0x48,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntpd %zmm17, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %zmm17, 8128(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x7f]
|
||||||
|
vmovntpd %zmm17, 8128(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %zmm17, 8192(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0x00,0x20,0x00,0x00]
|
||||||
|
vmovntpd %zmm17, 8192(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %zmm17, -8192(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x80]
|
||||||
|
vmovntpd %zmm17, -8192(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %zmm17, -8256(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0xc0,0xdf,0xff,0xff]
|
||||||
|
vmovntpd %zmm17, -8256(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %zmm5, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x29]
|
||||||
|
vmovntps %zmm5, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %zmm5, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0xb1,0x7c,0x48,0x2b,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntps %zmm5, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %zmm5, 8128(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x7f]
|
||||||
|
vmovntps %zmm5, 8128(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %zmm5, 8192(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0x00,0x20,0x00,0x00]
|
||||||
|
vmovntps %zmm5, 8192(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %zmm5, -8192(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x80]
|
||||||
|
vmovntps %zmm5, -8192(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %zmm5, -8256(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0xc0,0xdf,0xff,0xff]
|
||||||
|
vmovntps %zmm5, -8256(%rdx)
|
||||||
|
|
||||||
// CHECK: vmovupd %zmm9, %zmm27
|
// CHECK: vmovupd %zmm9, %zmm27
|
||||||
// CHECK: encoding: [0x62,0x41,0xfd,0x48,0x10,0xd9]
|
// CHECK: encoding: [0x62,0x41,0xfd,0x48,0x10,0xd9]
|
||||||
vmovupd %zmm9, %zmm27
|
vmovupd %zmm9, %zmm27
|
||||||
|
|
|
@ -432,6 +432,198 @@
|
||||||
// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x6f,0xaa,0xe0,0xef,0xff,0xff]
|
// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x6f,0xaa,0xe0,0xef,0xff,0xff]
|
||||||
vmovdqu64 -4128(%rdx), %ymm29
|
vmovdqu64 -4128(%rdx), %ymm29
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %xmm22, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x31]
|
||||||
|
vmovntdq %xmm22, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %xmm22, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xe7,0xb4,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntdq %xmm22, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %xmm22, 2032(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x7f]
|
||||||
|
vmovntdq %xmm22, 2032(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %xmm22, 2048(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0x00,0x08,0x00,0x00]
|
||||||
|
vmovntdq %xmm22, 2048(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %xmm22, -2048(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x80]
|
||||||
|
vmovntdq %xmm22, -2048(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %xmm22, -2064(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0xf0,0xf7,0xff,0xff]
|
||||||
|
vmovntdq %xmm22, -2064(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %ymm19, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x19]
|
||||||
|
vmovntdq %ymm19, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %ymm19, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0xe7,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntdq %ymm19, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %ymm19, 4064(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x7f]
|
||||||
|
vmovntdq %ymm19, 4064(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %ymm19, 4096(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0x00,0x10,0x00,0x00]
|
||||||
|
vmovntdq %ymm19, 4096(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %ymm19, -4096(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x80]
|
||||||
|
vmovntdq %ymm19, -4096(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdq %ymm19, -4128(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0xe0,0xef,0xff,0xff]
|
||||||
|
vmovntdq %ymm19, -4128(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa (%rcx), %xmm24
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x01]
|
||||||
|
vmovntdqa (%rcx), %xmm24
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 291(%rax,%r14,8), %xmm24
|
||||||
|
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x2a,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntdqa 291(%rax,%r14,8), %xmm24
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 2032(%rdx), %xmm24
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x7f]
|
||||||
|
vmovntdqa 2032(%rdx), %xmm24
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 2048(%rdx), %xmm24
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0x00,0x08,0x00,0x00]
|
||||||
|
vmovntdqa 2048(%rdx), %xmm24
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa -2048(%rdx), %xmm24
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x80]
|
||||||
|
vmovntdqa -2048(%rdx), %xmm24
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa -2064(%rdx), %xmm24
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0xf0,0xf7,0xff,0xff]
|
||||||
|
vmovntdqa -2064(%rdx), %xmm24
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa (%rcx), %ymm28
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x21]
|
||||||
|
vmovntdqa (%rcx), %ymm28
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 291(%rax,%r14,8), %ymm28
|
||||||
|
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x2a,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntdqa 291(%rax,%r14,8), %ymm28
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 4064(%rdx), %ymm28
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x7f]
|
||||||
|
vmovntdqa 4064(%rdx), %ymm28
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa 4096(%rdx), %ymm28
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0x00,0x10,0x00,0x00]
|
||||||
|
vmovntdqa 4096(%rdx), %ymm28
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa -4096(%rdx), %ymm28
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x80]
|
||||||
|
vmovntdqa -4096(%rdx), %ymm28
|
||||||
|
|
||||||
|
// CHECK: vmovntdqa -4128(%rdx), %ymm28
|
||||||
|
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0xe0,0xef,0xff,0xff]
|
||||||
|
vmovntdqa -4128(%rdx), %ymm28
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %xmm17, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x09]
|
||||||
|
vmovntpd %xmm17, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %xmm17, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntpd %xmm17, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %xmm17, 2032(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x7f]
|
||||||
|
vmovntpd %xmm17, 2032(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %xmm17, 2048(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0x00,0x08,0x00,0x00]
|
||||||
|
vmovntpd %xmm17, 2048(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %xmm17, -2048(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x80]
|
||||||
|
vmovntpd %xmm17, -2048(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %xmm17, -2064(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0xf0,0xf7,0xff,0xff]
|
||||||
|
vmovntpd %xmm17, -2064(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %ymm27, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x19]
|
||||||
|
vmovntpd %ymm27, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %ymm27, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x2b,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntpd %ymm27, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %ymm27, 4064(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x7f]
|
||||||
|
vmovntpd %ymm27, 4064(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %ymm27, 4096(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0x00,0x10,0x00,0x00]
|
||||||
|
vmovntpd %ymm27, 4096(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %ymm27, -4096(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x80]
|
||||||
|
vmovntpd %ymm27, -4096(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntpd %ymm27, -4128(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0xe0,0xef,0xff,0xff]
|
||||||
|
vmovntpd %ymm27, -4128(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %xmm26, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x11]
|
||||||
|
vmovntps %xmm26, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %xmm26, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x2b,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntps %xmm26, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %xmm26, 2032(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x7f]
|
||||||
|
vmovntps %xmm26, 2032(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %xmm26, 2048(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0x00,0x08,0x00,0x00]
|
||||||
|
vmovntps %xmm26, 2048(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %xmm26, -2048(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x80]
|
||||||
|
vmovntps %xmm26, -2048(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %xmm26, -2064(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0xf0,0xf7,0xff,0xff]
|
||||||
|
vmovntps %xmm26, -2064(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %ymm28, (%rcx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x21]
|
||||||
|
vmovntps %ymm28, (%rcx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %ymm28, 291(%rax,%r14,8)
|
||||||
|
// CHECK: encoding: [0x62,0x21,0x7c,0x28,0x2b,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||||
|
vmovntps %ymm28, 291(%rax,%r14,8)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %ymm28, 4064(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x7f]
|
||||||
|
vmovntps %ymm28, 4064(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %ymm28, 4096(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0x00,0x10,0x00,0x00]
|
||||||
|
vmovntps %ymm28, 4096(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %ymm28, -4096(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x80]
|
||||||
|
vmovntps %ymm28, -4096(%rdx)
|
||||||
|
|
||||||
|
// CHECK: vmovntps %ymm28, -4128(%rdx)
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0xe0,0xef,0xff,0xff]
|
||||||
|
vmovntps %ymm28, -4128(%rdx)
|
||||||
|
|
||||||
// CHECK: vmovupd %xmm22, %xmm24
|
// CHECK: vmovupd %xmm22, %xmm24
|
||||||
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x10,0xc6]
|
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x10,0xc6]
|
||||||
vmovupd %xmm22, %xmm24
|
vmovupd %xmm22, %xmm24
|
||||||
|
|
Loading…
Reference in New Issue