forked from OSchip/llvm-project
[X86][AVX1] Split 256-bit vector non-temporal FastISel loads to keep it non-temporal (PR32744)
Extension to D33728 llvm-svn: 304798
This commit is contained in:
parent
8cd60a5067
commit
f7113fd270
|
@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
|||
assert(HasAVX);
|
||||
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
||||
Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
|
||||
else if (IsNonTemporal && Alignment >= 16)
|
||||
return false; // Force split for X86::VMOVNTDQArm
|
||||
else if (Alignment >= 32)
|
||||
Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
|
||||
else
|
||||
|
@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
|||
assert(HasAVX);
|
||||
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
||||
Opc = X86::VMOVNTDQAYrm;
|
||||
else if (IsNonTemporal && Alignment >= 16)
|
||||
return false; // Force split for X86::VMOVNTDQArm
|
||||
else if (Alignment >= 32)
|
||||
Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
|
||||
else
|
||||
|
@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
|||
assert(HasAVX);
|
||||
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
||||
Opc = X86::VMOVNTDQAYrm;
|
||||
else if (IsNonTemporal && Alignment >= 16)
|
||||
return false; // Force split for X86::VMOVNTDQArm
|
||||
else if (Alignment >= 32)
|
||||
Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
|
||||
else
|
||||
|
|
|
@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) {
|
|||
;
|
||||
; AVX1-LABEL: test_load_nt8xfloat:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vmovaps (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: # implicit-def: %YMM1
|
||||
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_load_nt8xfloat:
|
||||
|
@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) {
|
|||
;
|
||||
; AVX1-LABEL: test_load_nt4xdouble:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vmovapd (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: # implicit-def: %YMM1
|
||||
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_load_nt4xdouble:
|
||||
|
@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) {
|
|||
;
|
||||
; AVX1-LABEL: test_load_nt32xi8:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: # implicit-def: %YMM1
|
||||
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_load_nt32xi8:
|
||||
|
@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) {
|
|||
;
|
||||
; AVX1-LABEL: test_load_nt16xi16:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: # implicit-def: %YMM1
|
||||
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_load_nt16xi16:
|
||||
|
@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) {
|
|||
;
|
||||
; AVX1-LABEL: test_load_nt8xi32:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: # implicit-def: %YMM1
|
||||
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_load_nt8xi32:
|
||||
|
@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) {
|
|||
;
|
||||
; AVX1-LABEL: test_load_nt4xi64:
|
||||
; AVX1: # BB#0: # %entry
|
||||
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
|
||||
; AVX1-NEXT: # implicit-def: %YMM1
|
||||
; AVX1-NEXT: vmovaps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_load_nt4xi64:
|
||||
|
|
Loading…
Reference in New Issue