[libc] Make expm1f correctly rounded when the targets have no FMA instructions.

Add another exceptional value and fix the case when |x| is small.

Performance tests with CORE-MATH project scripts:
With FMA instructions on Ryzen 1700:
```
$ ./perf.sh expm1f
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH reciprocal throughput   : 15.362
System LIBC reciprocal throughput : 53.194
LIBC reciprocal throughput        : 14.595
$ ./perf.sh expm1f --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH latency   : 57.755
System LIBC latency : 147.020
LIBC latency        : 60.269
```
Without FMA instructions:
```
$ ./perf.sh expm1f
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH reciprocal throughput   : 15.362
System LIBC reciprocal throughput : 53.300
LIBC reciprocal throughput        : 18.020
$ ./perf.sh expm1f --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH latency   : 57.758
System LIBC latency : 147.025
LIBC latency        : 70.304
```

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D123440
This commit is contained in:
Tue Ly 2022-04-09 00:12:50 -04:00 committed by Tue Ly
parent 4fc502368a
commit 484319f497
5 changed files with 52 additions and 28 deletions

View File

@ -34,6 +34,15 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
return 0x1.8dbe62p-3f;
}
#if !defined(LIBC_TARGET_HAS_FMA)
if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
int round_mode = fputil::get_round();
if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
return -0x1.71c884p-4f;
return -0x1.71c882p-4f;
}
#endif // LIBC_TARGET_HAS_FMA
// When |x| > 25*log(2), or nan
if (unlikely(x_abs >= 0x418a'a123U)) {
// x < log(2^-25)
@ -70,19 +79,30 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
// x = -0.0f
if (unlikely(xbits.uintval() == 0x8000'0000U))
return x;
// When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
// is:
// |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
// = |x|
// < 2^-25
// < epsilon(1)/2.
// So the correctly rounded values of expm1(x) are:
// = x + eps(x) if rounding mode = FE_UPWARD,
// or (rounding mode = FE_TOWARDZERO and x is negative),
// = x otherwise.
// To simplify the rounding decision and make it more efficient, we use
// fma(x, x, x) ~ x + x^2 instead.
return fputil::multiply_add(x, x, x);
// When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
// is:
// |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
// = |x|
// < 2^-25
// < epsilon(1)/2.
// So the correctly rounded values of expm1(x) are:
// = x + eps(x) if rounding mode = FE_UPWARD,
// or (rounding mode = FE_TOWARDZERO and x is
// negative),
// = x otherwise.
// To simplify the rounding decision and make it more efficient, we use
// fma(x, x, x) ~ x + x^2 instead.
// Note: to use the formula x + x^2 to decide the correct rounding, we
// do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
// 2^-76. For targets without FMA instructions, we simply use double for
// intermediate results as it is more efficient than using an emulated
// version of FMA.
#if defined(LIBC_TARGET_HAS_FMA)
return fputil::fma(x, x, x);
#else
double xd = x;
return static_cast<float>(fputil::multiply_add(xd, xd, xd));
#endif // LIBC_TARGET_HAS_FMA
}
// 2^-25 <= |x| < 2^-4

View File

@ -1189,9 +1189,6 @@ add_fp_unittest(
libc.src.__support.FPUtil.fputil
)
# Without FMA instructions, the current expm1f implementation is not correctly
# rounded for all float inputs (1 extra exceptional value). This will be fixed
# in the followup patch: https://reviews.llvm.org/D123440
add_fp_unittest(
expm1f_test
NEED_MPFR
@ -1204,8 +1201,6 @@ add_fp_unittest(
libc.include.math
libc.src.math.expm1f
libc.src.__support.FPUtil.fputil
FLAGS
FMA_OPT__ONLY
)
add_fp_unittest(

View File

@ -92,7 +92,6 @@ add_fp_unittest(
DEPENDS
.exhaustive_test
libc.include.math
libc.src.math.expf
libc.src.math.expm1f
libc.src.__support.FPUtil.fputil
LINK_LIBRARIES

View File

@ -18,7 +18,7 @@ using FPBits = __llvm_libc::fputil::FPBits<float>;
namespace mpfr = __llvm_libc::testing::mpfr;
struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
struct LlvmLibcExpm1fExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
bool check(uint32_t start, uint32_t stop,
mpfr::RoundingMode rounding) override {
mpfr::ForceRoundingMode r(rounding);
@ -40,21 +40,21 @@ static const int NUM_THREADS = std::thread::hardware_concurrency();
static constexpr uint32_t POS_START = 0x0000'0000U;
static constexpr uint32_t POS_STOP = 0x42b2'0000U;
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::Nearest);
}
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundUp) {
test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundDown) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::Downward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundTowardZero) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::TowardZero);
}
@ -63,21 +63,21 @@ TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
static constexpr uint32_t NEG_START = 0x8000'0000U;
static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::Nearest);
}
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundUp) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundDown) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::Downward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) {
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundTowardZero) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::TowardZero);
}

View File

@ -97,6 +97,16 @@ TEST(LlvmLibcExpm1fTest, Borderline) {
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
x = float(FPBits(0x942ed494U));
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
x = float(FPBits(0xbdc1c6cbU));
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
}
TEST(LlvmLibcExpm1fTest, InFloatRange) {