llvm-project/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll

430 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
; Check generated fp16 fused MAC and MLS.
define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fadd half %1, %f3
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest4:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r2]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r0]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest4:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r2]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f2, %f3
%2 = fsub half %f1, %1
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest6:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest6:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fsub half -0.0, %1
%3 = fsub half %2, %f3
store half %3, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest8:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fsub half %1, %f3
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
; CHECK-LABEL: test_fma_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_f16:
; DONT-FUSE: @ %bb.0: @ %entry
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
entry:
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%c = load half, half *%cc, align 2
%tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
store half %tmp1, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
; CHECK-LABEL: test_fnms_f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fnms_f16:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%c = load half, half *%cc, align 2
%tmp2 = fsub half -0.0, %c
%tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
store half %tmp3, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
; CHECK-LABEL: test_fma_const_fold:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vadd.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_const_fold:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
store half %ret, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
; CHECK-LABEL: test_fma_canonicalize:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vmov.f16 s4, #2.000000e+00
; CHECK-NEXT: vfma.f16 s2, s0, s4
; CHECK-NEXT: vstr.16 s2, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_canonicalize:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00
; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
; DONT-FUSE-NEXT: vstr.16 s2, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
store half %ret, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%s = fsub half -0.0, %f1
%ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%s = fsub half -0.0, %f1
%ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnma1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
%n1 = fsub half -0.0, %fma
store half %n1, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnma2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n1 = fsub half -0.0, %f1
%n3 = fsub half -0.0, %f3
%ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n3 = fsub half -0.0, %f3
%ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n1 = fsub half -0.0, %f1
%fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
%n = fsub half -0.0, %fma
store half %n, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms3:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms3:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n2 = fsub half -0.0, %f2
%fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
%n1 = fsub half -0.0, %fma
store half %n1, half *%a1, align 2
ret void
}
declare half @llvm.fma.f16(half, half, half) nounwind readnone