forked from OSchip/llvm-project
X86-FMA3: Improved/enabled the memory folding optimization for scalar loads
generated for _mm_losd_s{s,d}() intrinsics and used in scalar FMAs generated for FMA intrinsics _mm_f{madd,msub,nmadd,nmsub}_s{s,d}(). Reviewer: David Kreitzer Differential Revision: http://reviews.llvm.org/D14762 llvm-svn: 254140
This commit is contained in:
parent
4c175cdc8e
commit
ed865dfcc5
|
@ -170,7 +170,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
|
|||
// Commuting the 1st operand of FMA*_Int requires some additional analysis,
|
||||
// the commute optimization is legal only if all users of FMA*_Int use only
|
||||
// the lowest element of the FMA*_Int instruction. Even though such analysis
|
||||
// may be not implemened yet we allow the routines doing the actual commute
|
||||
// may be not implemented yet we allow the routines doing the actual commute
|
||||
// transformation to decide if one or another instruction is commutable or not.
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
|
||||
hasSideEffects = 0 in
|
||||
|
@ -237,20 +237,12 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
|||
// require the pass-through values to come from the first source
|
||||
// operand, not the second.
|
||||
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME#"SSr213r_Int")
|
||||
(COPY_TO_REGCLASS $src1, FR32),
|
||||
(COPY_TO_REGCLASS $src2, FR32),
|
||||
(COPY_TO_REGCLASS $src3, FR32)),
|
||||
VR128)>;
|
||||
(COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int")
|
||||
$src1, $src2, $src3), VR128)>;
|
||||
|
||||
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(NAME#"SDr213r_Int")
|
||||
(COPY_TO_REGCLASS $src1, FR64),
|
||||
(COPY_TO_REGCLASS $src2, FR64),
|
||||
(COPY_TO_REGCLASS $src3, FR64)),
|
||||
VR128)>;
|
||||
(COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int")
|
||||
$src1, $src2, $src3), VR128)>;
|
||||
}
|
||||
|
||||
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
|
||||
|
|
|
@ -5867,6 +5867,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
|
|||
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
|
||||
case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
|
||||
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
|
||||
case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
|
||||
case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
|
||||
case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
|
||||
case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
|
||||
case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
|
||||
case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
@ -5882,6 +5888,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
|
|||
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
|
||||
case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
|
||||
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
|
||||
case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
|
||||
case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
|
||||
case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
|
||||
case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
|
||||
case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
|
||||
case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,383 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define void @fmadd_aab_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fmadd_aab_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmadd_aba_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fmadd_aba_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmsub_aab_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fmsub_aab_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmsub_aba_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fmsub_aba_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmadd_aab_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fnmadd_aab_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmadd_aba_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fnmadd_aba_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmsub_aab_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fnmsub_aab_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmsub_aba_ss(float* %a, float* %b) #0 {
|
||||
; CHECK-LABEL: fnmsub_aba_ss:
|
||||
; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load float, float* %a
|
||||
%av0 = insertelement <4 x float> undef, float %a.val, i32 0
|
||||
%av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
|
||||
%av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
|
||||
%av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
|
||||
|
||||
%b.val = load float, float* %b
|
||||
%bv0 = insertelement <4 x float> undef, float %b.val, i32 0
|
||||
%bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
|
||||
%bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
|
||||
|
||||
%vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
|
||||
|
||||
%sr = extractelement <4 x float> %vr, i32 0
|
||||
store float %sr, float* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmadd_aab_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fmadd_aab_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmadd_aba_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fmadd_aba_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmsub_aab_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fmsub_aab_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmsub_aba_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fmsub_aba_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmadd_aab_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fnmadd_aab_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmadd_aba_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fnmadd_aba_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmsub_aab_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fnmsub_aab_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fnmsub_aba_sd(double* %a, double* %b) #0 {
|
||||
; CHECK-LABEL: fnmsub_aba_sd:
|
||||
; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
|
||||
; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]]
|
||||
; CHECK-NEXT: vmovlps %[[XMM]], (%rcx)
|
||||
; CHECK-NEXT: ret
|
||||
%a.val = load double, double* %a
|
||||
%av0 = insertelement <2 x double> undef, double %a.val, i32 0
|
||||
%av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
|
||||
|
||||
%b.val = load double, double* %b
|
||||
%bv0 = insertelement <2 x double> undef, double %b.val, i32 0
|
||||
%bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
|
||||
|
||||
%vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
|
||||
|
||||
%sr = extractelement <2 x double> %vr, i32 0
|
||||
store double %sr, double* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue