forked from OSchip/llvm-project
[AArch64] Some more FP16 FMA pattern matching
After our previous machinecombiner exercises (rL371321, rL371818, rL371833), we were still missing a few FP16 FMA patterns. Differential Revision: https://reviews.llvm.org/D67576 llvm-svn: 371960
This commit is contained in:
parent
ca5acf5b5e
commit
b1e1a26e8e
|
@ -3315,20 +3315,37 @@ defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
|
|||
|
||||
// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
|
||||
// the NEON variant.
|
||||
|
||||
// Here we handle first -(a + b*c) for FNMADD:
|
||||
|
||||
let Predicates = [HasNEON, HasFullFP16] in
|
||||
def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
|
||||
(FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
|
||||
|
||||
def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
|
||||
(FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
|
||||
(FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
|
||||
// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
|
||||
// "(-a) + b*(-c)".
|
||||
// Now it's time for "(-a) + (-b)*c"
|
||||
|
||||
let Predicates = [HasNEON, HasFullFP16] in
|
||||
def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
|
||||
(FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
|
||||
|
||||
def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
|
||||
(FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
|
||||
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
|
||||
// And here "(-a) + b*(-c)"
|
||||
|
||||
let Predicates = [HasNEON, HasFullFP16] in
|
||||
def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))),
|
||||
(FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
|
||||
|
||||
def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
|
||||
(FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
|
|
|
@ -175,8 +175,7 @@ entry:
|
|||
|
||||
define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
|
||||
; CHECK-LABEL: t_vfmsh_lane_f16:
|
||||
; CHECK: fneg h1, h1
|
||||
; CHECK: fmadd h0, h1, h2, h0
|
||||
; CHECK: fmsub h0, h1, h2, h0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = fsub half 0xH8000, %b
|
||||
|
@ -187,9 +186,8 @@ entry:
|
|||
|
||||
define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
|
||||
; CHECK-LABEL: t_vfmsh_laneq_f16:
|
||||
; CHECK: fneg h1, h1
|
||||
; CHECK-NEXT: fmadd h0, h1, h2, h0
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: fmsub h0, h1, h2, h0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = fsub half 0xH8000, %b
|
||||
%extract = extractelement <8 x half> %c, i32 0
|
||||
|
|
|
@ -10,44 +10,83 @@ entry:
|
|||
}
|
||||
|
||||
define half @fnma16(half %a, half %b, half %c) nounwind readnone ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fnma16:
|
||||
; CHECK: fnmadd h0, h0, h1, h2
|
||||
entry:
|
||||
%0 = tail call half @llvm.fma.f16(half %a, half %b, half %c)
|
||||
%mul = fmul half %0, -1.000000e+00
|
||||
ret half %mul
|
||||
}
|
||||
|
||||
define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fms16:
|
||||
; CHECK: fmsub h0, h0, h1, h2
|
||||
entry:
|
||||
%mul = fmul half %b, -1.000000e+00
|
||||
%0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)
|
||||
ret half %0
|
||||
}
|
||||
|
||||
define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fms16_com:
|
||||
|
||||
; FIXME: This should be a fmsub.
|
||||
|
||||
; CHECK: fneg h1, h1
|
||||
; CHECK-NEXT: fmadd h0, h1, h0, h2
|
||||
; CHECK: fmsub h0, h1, h0, h2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%mul = fmul half %b, -1.000000e+00
|
||||
%0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c)
|
||||
ret half %0
|
||||
}
|
||||
|
||||
define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: fnms16:
|
||||
; CHECK: fnmsub h0, h0, h1, h2
|
||||
; CHECK: fnmsub h0, h0, h1, h2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%mul = fmul half %c, -1.000000e+00
|
||||
%0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul)
|
||||
ret half %0
|
||||
}
|
||||
|
||||
declare half @llvm.fma.f16(half, half, half)
|
||||
define half @test_fmsub(half %a, half %b, half %c) {
|
||||
; CHECK-LABEL: test_fmsub:
|
||||
; CHECK: fmsub h0, h0, h1, h2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%nega = fsub half -0.0, %a
|
||||
%val = call half @llvm.fma.f16(half %nega, half %b, half %c)
|
||||
ret half %val
|
||||
}
|
||||
|
||||
define half @test_fnmadd(half %a, half %b, half %c) {
|
||||
; CHECK-LABEL: test_fnmadd:
|
||||
; CHECK: fnmadd h0, h0, h1, h2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%nega = fsub half -0.0, %a
|
||||
%negc = fsub half -0.0, %c
|
||||
%val = call half @llvm.fma.f16(half %nega, half %b, half %negc)
|
||||
ret half %val
|
||||
}
|
||||
|
||||
define half @test_fmadd(half %a, half %b, half %c) {
|
||||
; CHECK-LABEL: test_fmadd:
|
||||
; CHECK: fmadd h0, h0, h1, h2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%nega = fsub half -0.0, %a
|
||||
%negb = fsub half -0.0, %b
|
||||
%val = call half @llvm.fma.f16(half %nega, half %negb, half %c)
|
||||
ret half %val
|
||||
}
|
||||
|
||||
define half @test_fnmsub(half %a, half %b, half %c) {
|
||||
; CHECK-LABEL: test_fnmsub:
|
||||
; CHECK: fnmsub h0, h0, h1, h2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%negc = fsub half -0.0, %c
|
||||
%val = call half @llvm.fma.f16(half %a, half %b, half %negc)
|
||||
ret half %val
|
||||
}
|
||||
|
||||
declare half @llvm.fma.f16(half, half, half)
|
||||
|
|
Loading…
Reference in New Issue