[ARM] FP16: support VFMA

This is addressing PR38404.

llvm-svn: 338830
This commit is contained in:
Sjoerd Meijer 2018-08-03 09:12:56 +00:00
parent 7a3bd723b4
commit 9b30213828
2 changed files with 46 additions and 26 deletions

View File

@ -4734,6 +4734,12 @@ def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
(VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;

View File

@ -911,34 +911,48 @@ entry:
ret <8 x half> %sub.i
}
define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; CHECK-LABEL: test_vfma_f16:
; CHECK: vfma.f16 d0, d1, d2
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
ret <4 x half> %0
}
define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; CHECK-LABEL: test_vfmaq_f16:
; CHECK: vfma.f16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
ret <8 x half> %0
}
define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
; CHECK-LABEL: test_vfms_f16:
; CHECK: vneg.f16 [[D16:d[0-9]+]], d1
; CHECK-NEXT: vfma.f16 d0, [[D16]], d2
; CHECK-NEXT: bx lr
entry:
%sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
%0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
ret <4 x half> %0
}
define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; CHECK-LABEL: test_vfmsq_f16:
; CHECK: vneg.f16 [[Q8:q[0-9]+]], q1
; CHECK-NEXT: vfma.f16 q0, [[Q8]], q2
; CHECK-NEXT: bx lr
entry:
%sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
%0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
ret <8 x half> %0
}
; FIXME (PR38404)
;
;define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
;entry:
; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
; ret <4 x half> %0
;}
;define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
;entry:
; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
; ret <8 x half> %0
;}
;define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
;entry:
; %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
; ret <4 x half> %0
;}
;define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
;entry:
; %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
; ret <8 x half> %0
;}
;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
;entry:
; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>