forked from OSchip/llvm-project
[libc] Make expm1f correctly rounded when the targets have no FMA instructions.
Add another exceptional value and fix the case when |x| is small. Performance tests with CORE-MATH project scripts: With FMA instructions on Ryzen 1700: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH reciprocal throughput : 15.362 System LIBC reciprocal throughput : 53.194 LIBC reciprocal throughput : 14.595 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH latency : 57.755 System LIBC latency : 147.020 LIBC latency : 60.269 ``` Without FMA instructions: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH reciprocal throughput : 15.362 System LIBC reciprocal throughput : 53.300 LIBC reciprocal throughput : 18.020 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH latency : 57.758 System LIBC latency : 147.025 LIBC latency : 70.304 ``` Reviewed By: michaelrj Differential Revision: https://reviews.llvm.org/D123440
This commit is contained in:
parent
4fc502368a
commit
484319f497
|
@ -34,6 +34,15 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
|
|||
return 0x1.8dbe62p-3f;
|
||||
}
|
||||
|
||||
#if !defined(LIBC_TARGET_HAS_FMA)
|
||||
if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
|
||||
int round_mode = fputil::get_round();
|
||||
if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
|
||||
return -0x1.71c884p-4f;
|
||||
return -0x1.71c882p-4f;
|
||||
}
|
||||
#endif // LIBC_TARGET_HAS_FMA
|
||||
|
||||
// When |x| > 25*log(2), or nan
|
||||
if (unlikely(x_abs >= 0x418a'a123U)) {
|
||||
// x < log(2^-25)
|
||||
|
@ -70,19 +79,30 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
|
|||
// x = -0.0f
|
||||
if (unlikely(xbits.uintval() == 0x8000'0000U))
|
||||
return x;
|
||||
// When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
|
||||
// is:
|
||||
// |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
|
||||
// = |x|
|
||||
// < 2^-25
|
||||
// < epsilon(1)/2.
|
||||
// So the correctly rounded values of expm1(x) are:
|
||||
// = x + eps(x) if rounding mode = FE_UPWARD,
|
||||
// or (rounding mode = FE_TOWARDZERO and x is negative),
|
||||
// = x otherwise.
|
||||
// To simplify the rounding decision and make it more efficient, we use
|
||||
// fma(x, x, x) ~ x + x^2 instead.
|
||||
return fputil::multiply_add(x, x, x);
|
||||
// When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
|
||||
// is:
|
||||
// |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
|
||||
// = |x|
|
||||
// < 2^-25
|
||||
// < epsilon(1)/2.
|
||||
// So the correctly rounded values of expm1(x) are:
|
||||
// = x + eps(x) if rounding mode = FE_UPWARD,
|
||||
// or (rounding mode = FE_TOWARDZERO and x is
|
||||
// negative),
|
||||
// = x otherwise.
|
||||
// To simplify the rounding decision and make it more efficient, we use
|
||||
// fma(x, x, x) ~ x + x^2 instead.
|
||||
// Note: to use the formula x + x^2 to decide the correct rounding, we
|
||||
// do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
|
||||
// 2^-76. For targets without FMA instructions, we simply use double for
|
||||
// intermediate results as it is more efficient than using an emulated
|
||||
// version of FMA.
|
||||
#if defined(LIBC_TARGET_HAS_FMA)
|
||||
return fputil::fma(x, x, x);
|
||||
#else
|
||||
double xd = x;
|
||||
return static_cast<float>(fputil::multiply_add(xd, xd, xd));
|
||||
#endif // LIBC_TARGET_HAS_FMA
|
||||
}
|
||||
|
||||
// 2^-25 <= |x| < 2^-4
|
||||
|
|
|
@ -1189,9 +1189,6 @@ add_fp_unittest(
|
|||
libc.src.__support.FPUtil.fputil
|
||||
)
|
||||
|
||||
# Without FMA instructions, the current expm1f implementation is not correctly
|
||||
# rounded for all float inputs (1 extra exceptional value). This will be fixed
|
||||
# in the followup patch: https://reviews.llvm.org/D123440
|
||||
add_fp_unittest(
|
||||
expm1f_test
|
||||
NEED_MPFR
|
||||
|
@ -1204,8 +1201,6 @@ add_fp_unittest(
|
|||
libc.include.math
|
||||
libc.src.math.expm1f
|
||||
libc.src.__support.FPUtil.fputil
|
||||
FLAGS
|
||||
FMA_OPT__ONLY
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
|
|
|
@ -92,7 +92,6 @@ add_fp_unittest(
|
|||
DEPENDS
|
||||
.exhaustive_test
|
||||
libc.include.math
|
||||
libc.src.math.expf
|
||||
libc.src.math.expm1f
|
||||
libc.src.__support.FPUtil.fputil
|
||||
LINK_LIBRARIES
|
||||
|
|
|
@ -18,7 +18,7 @@ using FPBits = __llvm_libc::fputil::FPBits<float>;
|
|||
|
||||
namespace mpfr = __llvm_libc::testing::mpfr;
|
||||
|
||||
struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
|
||||
struct LlvmLibcExpm1fExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
|
||||
bool check(uint32_t start, uint32_t stop,
|
||||
mpfr::RoundingMode rounding) override {
|
||||
mpfr::ForceRoundingMode r(rounding);
|
||||
|
@ -40,21 +40,21 @@ static const int NUM_THREADS = std::thread::hardware_concurrency();
|
|||
static constexpr uint32_t POS_START = 0x0000'0000U;
|
||||
static constexpr uint32_t POS_STOP = 0x42b2'0000U;
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
|
||||
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
||||
mpfr::RoundingMode::Nearest);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundUp) {
|
||||
test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundDown) {
|
||||
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
||||
mpfr::RoundingMode::Downward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundTowardZero) {
|
||||
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
||||
mpfr::RoundingMode::TowardZero);
|
||||
}
|
||||
|
@ -63,21 +63,21 @@ TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
|
|||
static constexpr uint32_t NEG_START = 0x8000'0000U;
|
||||
static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
|
||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
||||
mpfr::RoundingMode::Nearest);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundUp) {
|
||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundDown) {
|
||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
||||
mpfr::RoundingMode::Downward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) {
|
||||
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundTowardZero) {
|
||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
||||
mpfr::RoundingMode::TowardZero);
|
||||
}
|
||||
|
|
|
@ -97,6 +97,16 @@ TEST(LlvmLibcExpm1fTest, Borderline) {
|
|||
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
||||
__llvm_libc::expm1f(x), 0.5);
|
||||
EXPECT_MATH_ERRNO(0);
|
||||
|
||||
x = float(FPBits(0x942ed494U));
|
||||
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
||||
__llvm_libc::expm1f(x), 0.5);
|
||||
EXPECT_MATH_ERRNO(0);
|
||||
|
||||
x = float(FPBits(0xbdc1c6cbU));
|
||||
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
||||
__llvm_libc::expm1f(x), 0.5);
|
||||
EXPECT_MATH_ERRNO(0);
|
||||
}
|
||||
|
||||
TEST(LlvmLibcExpm1fTest, InFloatRange) {
|
||||
|
|
Loading…
Reference in New Issue