[libc] Add hardware implementations of fma and fmaf for x86_64 and aarch64.

The current generic implementation of the fmaf function has been moved to the FPUtil directory. This allows one use the fma operation from implementations of other math functions like the trignometric functions without depending on/requiring the fma/fmaf/fmal function targets. If this pattern ends being convenient, we will switch all generic math implementations to this pattern. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D100811
2021-04-20 04:06:25 +00:00 · 2021-04-20 04:06:25 +00:00 · 95934c3a37
parent 05eeed9691
commit 95934c3a37
14 changed files with 281 additions and 20 deletions
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@ -77,6 +77,7 @@ set(TARGET_LIBM_ENTRYPOINTS
    libc.src.math.floor
    libc.src.math.floorf
    libc.src.math.floorl
+    libc.src.math.fma
    libc.src.math.fmaf
    libc.src.math.fmax
    libc.src.math.fmaxf
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@ -77,6 +77,7 @@ set(TARGET_LIBM_ENTRYPOINTS
    libc.src.math.floor
    libc.src.math.floorf
    libc.src.math.floorl
+    libc.src.math.fma
    libc.src.math.fmaf
    libc.src.math.fmin
    libc.src.math.fminf
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@ -32,6 +32,30 @@ function(add_math_entrypoint_object name)
  message(FATAL_ERROR "No machine specific or generic implementation found for ${name}.")
 endfunction()

+add_entrypoint_object(
+  fmaf
+  SRCS
+    fmaf.cpp
+  HDRS
+    fmaf.h
+  DEPENDS
+    libc.utils.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O2
+)
+
+add_entrypoint_object(
+  fma
+  SRCS
+    fma.cpp
+  HDRS
+    fma.h
+  DEPENDS
+    libc.utils.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O2
+)
+
 add_math_entrypoint_object(ceil)
 add_math_entrypoint_object(ceilf)
 add_math_entrypoint_object(ceill)
@ -58,8 +82,6 @@ add_math_entrypoint_object(floor)
 add_math_entrypoint_object(floorf)
 add_math_entrypoint_object(floorl)

-add_math_entrypoint_object(fmaf)
-
 add_math_entrypoint_object(fmax)
 add_math_entrypoint_object(fmaxf)
 add_math_entrypoint_object(fmaxl)
--- a/libc/src/math/fma.cpp
+++ b/libc/src/math/fma.cpp
@ -0,0 +1,20 @@
+//===-- Implementation of fma function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fma.h"
+#include "src/__support/common.h"
+
+#include "utils/FPUtil/FMA.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
+  return fputil::fma(x, y, z);
+}
+
+} // namespace __llvm_libc
--- a/libc/src/math/fma.h
+++ b/libc/src/math/fma.h
@ -0,0 +1,18 @@
+//===-- Implementation header for fma ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMA_H
+#define LLVM_LIBC_SRC_MATH_FMA_H
+
+namespace __llvm_libc {
+
+double fma(double x, double y, double z);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_FMA_H
--- a/libc/src/math/fmaf.cpp
+++ b/libc/src/math/fmaf.cpp
@ -0,0 +1,20 @@
+//===-- Implementation of fmaf function -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmaf.h"
+#include "src/__support/common.h"
+
+#include "utils/FPUtil/FMA.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
+  return fputil::fma(x, y, z);
+}
+
+} // namespace __llvm_libc
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@ -78,6 +78,9 @@ add_entrypoint_object(
    .sincosf_utils
    libc.include.math
    libc.src.errno.__errno_location
+    libc.utils.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O3
 )

 add_entrypoint_object(
@ -977,15 +980,3 @@ add_entrypoint_object(
  COMPILE_OPTIONS
    -O2
 )
-
-add_entrypoint_object(
-  fmaf
-  SRCS
-    fmaf.cpp
-  HDRS
-    ../fmaf.h
-  DEPENDS
-    libc.utils.FPUtil.fputil
-  COMPILE_OPTIONS
-    -O2
-)
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@ -1119,5 +1119,18 @@ add_fp_unittest(
    libc.utils.FPUtil.fputil
 )

+add_fp_unittest(
+  fma_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    fma_test.cpp
+  DEPENDS
+    libc.include.math
+    libc.src.math.fma
+    libc.utils.FPUtil.fputil
+)
+
 add_subdirectory(generic)
 add_subdirectory(exhaustive)
--- a/libc/test/src/math/fma_test.cpp
+++ b/libc/test/src/math/fma_test.cpp
@ -0,0 +1,23 @@
+//===-- Unittests for fma ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/fma.h"
+
+using LlvmLibcFmaTest = FmaTestTemplate<double>;
+
+TEST_F(LlvmLibcFmaTest, SpecialNumbers) {
+  testSpecialNumbers(&__llvm_libc::fma);
+}
+
+TEST_F(LlvmLibcFmaTest, SubnormalRange) {
+  testSubnormalRange(&__llvm_libc::fma);
+}
+
+TEST_F(LlvmLibcFmaTest, NormalRange) { testNormalRange(&__llvm_libc::fma); }
--- a/libc/utils/FPUtil/FMA.h
+++ b/libc/utils/FPUtil/FMA.h
@ -0,0 +1,37 @@
+//===-- Common header for FMA implementations -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_FPUTIL_FMA_H
+#define LLVM_LIBC_UTILS_FPUTIL_FMA_H
+
+#include "utils/CPP/TypeTraits.h"
+
+#ifdef __x86_64__
+#include "x86_64/FMA.h"
+#elif defined(__aarch64__)
+#include "aarch64/FMA.h"
+#else
+#include "generic/FMA.h"
+
+namespace __llvm_libc {
+namespace fputil {
+
+// We have a generic implementation available only for single precision fma os
+// we restrict it to float values for now.
+template <typename T>
+static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
+                                                                     T z) {
+  return generic::fma(x, y, z);
+}
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif
+
+#endif // LLVM_LIBC_UTILS_FPUTIL_FMA_H
--- a/libc/utils/FPUtil/aarch64/FMA.h
+++ b/libc/utils/FPUtil/aarch64/FMA.h
@ -0,0 +1,38 @@
+//===-- Aarch64 implementations of the fma function -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_FPUTIL_AARCH64_FMA_H
+#define LLVM_LIBC_UTILS_FPUTIL_AARCH64_FMA_H
+
+#include "utils/CPP/TypeTraits.h"
+
+namespace __llvm_libc {
+namespace fputil {
+
+template <typename T>
+cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y, T z) {
+  float result;
+  __asm__ __volatile__("fmadd %s0, %s1, %s2, %s3\n\t"
+                       : "=w"(result)
+                       : "w"(x), "w"(y), "w"(z));
+  return result;
+}
+
+template <typename T>
+cpp::EnableIfType<cpp::IsSame<T, double>::Value, T> fma(T x, T y, T z) {
+  double result;
+  __asm__ __volatile__("fmadd %d0, %d1, %d2, %d3\n\t"
+                       : "=w"(result)
+                       : "w"(x), "w"(y), "w"(z));
+  return result;
+}
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_FPUTIL_AARCH64_FMA_H
--- a/libc/utils/FPUtil/generic/FMA.h
+++ b/libc/utils/FPUtil/generic/FMA.h
@ -1,4 +1,4 @@
-//===-- Implementation of fmaf function -----------------------------------===//
+//===-- Common header for FMA implementations -------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@ -6,15 +6,18 @@
 //
 //===----------------------------------------------------------------------===//

-#include "src/math/fmaf.h"
-#include "src/__support/common.h"
+#ifndef LLVM_LIBC_UTILS_FPUTIL_GENERIC_FMA_H
+#define LLVM_LIBC_UTILS_FPUTIL_GENERIC_FMA_H

-#include "utils/FPUtil/FEnv.h"
-#include "utils/FPUtil/FPBits.h"
+#include "utils/CPP/TypeTraits.h"

 namespace __llvm_libc {
+namespace fputil {
+namespace generic {

-LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
+template <typename T>
+static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
+                                                                     T z) {
  // Product is exact.
  double prod = static_cast<double>(x) * static_cast<double>(y);
  double z_d = static_cast<double>(z);
@ -62,4 +65,10 @@ LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
  return static_cast<float>(static_cast<double>(bit_sum));
 }

+} // namespace generic
+} // namespace fputil
 } // namespace __llvm_libc
+
+#endif // Generic fma implementations
+
+#endif // LLVM_LIBC_UTILS_FPUTIL_GENERIC_FMA_H
--- a/libc/utils/FPUtil/generic/README.md
+++ b/libc/utils/FPUtil/generic/README.md
@ -0,0 +1,28 @@
+This directory contains machine independent implementations of floating point
+operations. The implementations are nested in the namespace
+`__llvm_libc::fputil::generic`. This is to facilitate calling these generic
+implementations from machine dependent implementations. Consider the example of
+the fuse-multiply-add operation (FMA). The C standard library requires three
+different flavors, `fma` which operates double precsion numbers, `fmaf` which
+operates on single precision numbers, and `fmal` which operates on `lond double`
+numbers. On Aarch64, there are hardware instructions which implement the single
+and double precision flavors but not the `long double` flavor. For such targets,
+we want to be able to call the generic `long double` implementation from the
+`long double` flavor. By putting the generic implementations in a separate
+nested namespace, we will be to call them as follows:
+
+```
+namespace __llvm_libc {
+namespace fputil {
+
+long double fmal(long double x, long double y, long double z) {
+  return generic::fmal(x, y, z);
+}
+
+} // namespace fputil
+} // namespace __llvm_libc
+```
+
+Note that actual code might not be as straightforward as above (for example,
+we might want to prevent implicit type promotions by using some template
+facilities). But, the general idea is very similar.
--- a/libc/utils/FPUtil/x86_64/FMA.h
+++ b/libc/utils/FPUtil/x86_64/FMA.h
@ -0,0 +1,40 @@
+//===-- x86_64 implementations of the fma function --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_FPUTIL_X86_64_FMA_H
+#define LLVM_LIBC_UTILS_FPUTIL_X86_64_FMA_H
+
+#include "utils/CPP/TypeTraits.h"
+
+namespace __llvm_libc {
+namespace fputil {
+
+template <typename T>
+static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
+                                                                     T z) {
+  float result = x;
+  __asm__ __volatile__("vfmadd213ss %x2, %x1, %x0"
+                       : "+x"(result)
+                       : "x"(y), "x"(z));
+  return result;
+}
+
+template <typename T>
+static inline cpp::EnableIfType<cpp::IsSame<T, double>::Value, T> fma(T x, T y,
+                                                                      T z) {
+  double result = x;
+  __asm__ __volatile__("vfmadd213sd %x2, %x1, %x0"
+                       : "+x"(result)
+                       : "x"(y), "x"(z));
+  return result;
+}
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_FPUTIL_X86_64_FMA_H