forked from OSchip/llvm-project
Handle llvm.fma.* intrinsics. rdar://10914096
llvm-svn: 154439
This commit is contained in:
parent
02ecae9282
commit
d0007f3c83
|
@ -769,8 +769,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||||
setOperationAction(ISD::FPOW, MVT::f64, Expand);
|
setOperationAction(ISD::FPOW, MVT::f64, Expand);
|
||||||
setOperationAction(ISD::FPOW, MVT::f32, Expand);
|
setOperationAction(ISD::FPOW, MVT::f32, Expand);
|
||||||
|
|
||||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
if (!Subtarget->hasVFP4()) {
|
||||||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||||
|
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||||
|
}
|
||||||
|
|
||||||
// Various VFP goodness
|
// Various VFP goodness
|
||||||
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
|
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
|
||||||
|
|
|
@ -4133,6 +4133,14 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
|
||||||
v4f32, fmul_su, fsub_mlx>,
|
v4f32, fmul_su, fsub_mlx>,
|
||||||
Requires<[HasNEON2,FPContractions]>;
|
Requires<[HasNEON2,FPContractions]>;
|
||||||
|
|
||||||
|
// Match @llvm.fma.* intrinsics
|
||||||
|
def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)),
|
||||||
|
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
|
||||||
|
Requires<[HasNEON, HasVFP4]>;
|
||||||
|
def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)),
|
||||||
|
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
|
||||||
|
Requires<[HasNEON, HasVFP4]>;
|
||||||
|
|
||||||
// Vector Subtract Operations.
|
// Vector Subtract Operations.
|
||||||
|
|
||||||
// VSUB : Vector Subtract (integer and floating-point)
|
// VSUB : Vector Subtract (integer and floating-point)
|
||||||
|
|
|
@ -1080,6 +1080,14 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
||||||
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
||||||
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
|
Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
|
||||||
|
|
||||||
|
// Match @llvm.fma.* intrinsics
|
||||||
|
def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)),
|
||||||
|
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)),
|
||||||
|
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
||||||
|
Requires<[HasVFP4]>;
|
||||||
|
|
||||||
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
||||||
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
||||||
IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
|
IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp4 | FileCheck %s
|
||||||
|
|
||||||
|
define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_f32
|
||||||
|
; CHECK: vfma.f32
|
||||||
|
%call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
|
||||||
|
ret float %call
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_f64
|
||||||
|
; CHECK: vfma.f64
|
||||||
|
%call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
|
||||||
|
ret double %call
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: test_v2f32
|
||||||
|
; CHECK: vfma.f32
|
||||||
|
%0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
|
||||||
|
ret <2 x float> %0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||||
|
declare double @llvm.fma.f64(double, double, double) nounwind readnone
|
||||||
|
|
||||||
|
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
|
Loading…
Reference in New Issue