forked from OSchip/llvm-project
Clean up patterns for MOVNT*. Not sure why there were floating point types on MOVNTPS and MOVNTDQ. And v4i64 was completely missing.
llvm-svn: 147767
This commit is contained in:
parent
c1f5622ad3
commit
ef7f5bf8c9
|
@ -3234,17 +3234,12 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
|
|||
"movntpd\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v2f64 VR128:$src),
|
||||
addr:$dst)]>, VEX;
|
||||
def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
|
||||
(ins f128mem:$dst, VR128:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v2f64 VR128:$src),
|
||||
addr:$dst)]>, VEX;
|
||||
|
||||
let ExeDomain = SSEPackedInt in
|
||||
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||
(ins f128mem:$dst, VR128:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v4f32 VR128:$src),
|
||||
[(alignednontemporalstore (v2i64 VR128:$src),
|
||||
addr:$dst)]>, VEX;
|
||||
|
||||
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
|
||||
|
@ -3260,16 +3255,11 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
|
|||
"movntpd\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v4f64 VR256:$src),
|
||||
addr:$dst)]>, VEX;
|
||||
def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
|
||||
(ins f256mem:$dst, VR256:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v4f64 VR256:$src),
|
||||
addr:$dst)]>, VEX;
|
||||
let ExeDomain = SSEPackedInt in
|
||||
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||
(ins f256mem:$dst, VR256:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v8f32 VR256:$src),
|
||||
[(alignednontemporalstore (v4i64 VR256:$src),
|
||||
addr:$dst)]>, VEX;
|
||||
}
|
||||
|
||||
|
@ -3288,14 +3278,10 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
|||
"movntpd\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
|
||||
|
||||
def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
|
||||
|
||||
let ExeDomain = SSEPackedInt in
|
||||
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
||||
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
|
||||
|
||||
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
|
||||
(MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
|
||||
|
||||
define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
|
||||
; CHECK: vmovntps
|
||||
%cast = bitcast i8* %B to <8 x float>*
|
||||
%A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
|
||||
store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
|
||||
; CHECK: vmovntdq
|
||||
%cast1 = bitcast i8* %B to <4 x i64>*
|
||||
%E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
|
||||
store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
|
||||
; CHECK: vmovntpd
|
||||
%cast2 = bitcast i8* %B to <4 x double>*
|
||||
%C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
|
||||
store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
|
||||
; CHECK: movnti
|
||||
%cast3 = bitcast i8* %B to i32*
|
||||
store i32 %D, i32* %cast3, align 16, !nontemporal !0
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = metadata !{i32 1}
|
Loading…
Reference in New Issue