[AVX-512] Add masked scalar FMA intrinsics to isNonFoldablePartialRegisterLoad to improve load folding of scalar loads.

llvm-svn: 294151
This commit is contained in:
Craig Topper 2017-02-05 22:25:40 +00:00
parent 53008a1e36
commit 59af67206d
2 changed files with 28 additions and 8 deletions

View File

@ -7740,6 +7740,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
return false;
default:
return true;
@ -7773,6 +7785,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
return false;
default:
return true;

View File

@ -4843,10 +4843,9 @@ define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) {
; CHECK-LABEL: fmadd_ss_mask_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd213ss %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
@ -4872,10 +4871,9 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
; CHECK-LABEL: fmadd_ss_maskz_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd213ss %xmm0, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
@ -4901,10 +4899,9 @@ define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) {
; CHECK-LABEL: fmadd_sd_mask_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd213sd %xmm0, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmovlps %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
@ -4926,10 +4923,9 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
; CHECK-LABEL: fmadd_sd_maskz_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
; CHECK-NEXT: vfmadd213sd %xmm0, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovlps %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a