forked from OSchip/llvm-project
[libc] Make expm1f correctly rounded when the targets have no FMA instructions.
Add another exceptional value and fix the case when |x| is small. Performance tests with CORE-MATH project scripts: With FMA instructions on Ryzen 1700: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH reciprocal throughput : 15.362 System LIBC reciprocal throughput : 53.194 LIBC reciprocal throughput : 14.595 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH latency : 57.755 System LIBC latency : 147.020 LIBC latency : 60.269 ``` Without FMA instructions: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH reciprocal throughput : 15.362 System LIBC reciprocal throughput : 53.300 LIBC reciprocal throughput : 18.020 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a CORE-MATH latency : 57.758 System LIBC latency : 147.025 LIBC latency : 70.304 ``` Reviewed By: michaelrj Differential Revision: https://reviews.llvm.org/D123440
This commit is contained in:
parent
4fc502368a
commit
484319f497
|
@ -34,6 +34,15 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
|
||||||
return 0x1.8dbe62p-3f;
|
return 0x1.8dbe62p-3f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(LIBC_TARGET_HAS_FMA)
|
||||||
|
if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
|
||||||
|
int round_mode = fputil::get_round();
|
||||||
|
if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
|
||||||
|
return -0x1.71c884p-4f;
|
||||||
|
return -0x1.71c882p-4f;
|
||||||
|
}
|
||||||
|
#endif // LIBC_TARGET_HAS_FMA
|
||||||
|
|
||||||
// When |x| > 25*log(2), or nan
|
// When |x| > 25*log(2), or nan
|
||||||
if (unlikely(x_abs >= 0x418a'a123U)) {
|
if (unlikely(x_abs >= 0x418a'a123U)) {
|
||||||
// x < log(2^-25)
|
// x < log(2^-25)
|
||||||
|
@ -78,11 +87,22 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
|
||||||
// < epsilon(1)/2.
|
// < epsilon(1)/2.
|
||||||
// So the correctly rounded values of expm1(x) are:
|
// So the correctly rounded values of expm1(x) are:
|
||||||
// = x + eps(x) if rounding mode = FE_UPWARD,
|
// = x + eps(x) if rounding mode = FE_UPWARD,
|
||||||
// or (rounding mode = FE_TOWARDZERO and x is negative),
|
// or (rounding mode = FE_TOWARDZERO and x is
|
||||||
|
// negative),
|
||||||
// = x otherwise.
|
// = x otherwise.
|
||||||
// To simplify the rounding decision and make it more efficient, we use
|
// To simplify the rounding decision and make it more efficient, we use
|
||||||
// fma(x, x, x) ~ x + x^2 instead.
|
// fma(x, x, x) ~ x + x^2 instead.
|
||||||
return fputil::multiply_add(x, x, x);
|
// Note: to use the formula x + x^2 to decide the correct rounding, we
|
||||||
|
// do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
|
||||||
|
// 2^-76. For targets without FMA instructions, we simply use double for
|
||||||
|
// intermediate results as it is more efficient than using an emulated
|
||||||
|
// version of FMA.
|
||||||
|
#if defined(LIBC_TARGET_HAS_FMA)
|
||||||
|
return fputil::fma(x, x, x);
|
||||||
|
#else
|
||||||
|
double xd = x;
|
||||||
|
return static_cast<float>(fputil::multiply_add(xd, xd, xd));
|
||||||
|
#endif // LIBC_TARGET_HAS_FMA
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2^-25 <= |x| < 2^-4
|
// 2^-25 <= |x| < 2^-4
|
||||||
|
|
|
@ -1189,9 +1189,6 @@ add_fp_unittest(
|
||||||
libc.src.__support.FPUtil.fputil
|
libc.src.__support.FPUtil.fputil
|
||||||
)
|
)
|
||||||
|
|
||||||
# Without FMA instructions, the current expm1f implementation is not correctly
|
|
||||||
# rounded for all float inputs (1 extra exceptional value). This will be fixed
|
|
||||||
# in the followup patch: https://reviews.llvm.org/D123440
|
|
||||||
add_fp_unittest(
|
add_fp_unittest(
|
||||||
expm1f_test
|
expm1f_test
|
||||||
NEED_MPFR
|
NEED_MPFR
|
||||||
|
@ -1204,8 +1201,6 @@ add_fp_unittest(
|
||||||
libc.include.math
|
libc.include.math
|
||||||
libc.src.math.expm1f
|
libc.src.math.expm1f
|
||||||
libc.src.__support.FPUtil.fputil
|
libc.src.__support.FPUtil.fputil
|
||||||
FLAGS
|
|
||||||
FMA_OPT__ONLY
|
|
||||||
)
|
)
|
||||||
|
|
||||||
add_fp_unittest(
|
add_fp_unittest(
|
||||||
|
|
|
@ -92,7 +92,6 @@ add_fp_unittest(
|
||||||
DEPENDS
|
DEPENDS
|
||||||
.exhaustive_test
|
.exhaustive_test
|
||||||
libc.include.math
|
libc.include.math
|
||||||
libc.src.math.expf
|
|
||||||
libc.src.math.expm1f
|
libc.src.math.expm1f
|
||||||
libc.src.__support.FPUtil.fputil
|
libc.src.__support.FPUtil.fputil
|
||||||
LINK_LIBRARIES
|
LINK_LIBRARIES
|
||||||
|
|
|
@ -18,7 +18,7 @@ using FPBits = __llvm_libc::fputil::FPBits<float>;
|
||||||
|
|
||||||
namespace mpfr = __llvm_libc::testing::mpfr;
|
namespace mpfr = __llvm_libc::testing::mpfr;
|
||||||
|
|
||||||
struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
|
struct LlvmLibcExpm1fExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
|
||||||
bool check(uint32_t start, uint32_t stop,
|
bool check(uint32_t start, uint32_t stop,
|
||||||
mpfr::RoundingMode rounding) override {
|
mpfr::RoundingMode rounding) override {
|
||||||
mpfr::ForceRoundingMode r(rounding);
|
mpfr::ForceRoundingMode r(rounding);
|
||||||
|
@ -40,21 +40,21 @@ static const int NUM_THREADS = std::thread::hardware_concurrency();
|
||||||
static constexpr uint32_t POS_START = 0x0000'0000U;
|
static constexpr uint32_t POS_START = 0x0000'0000U;
|
||||||
static constexpr uint32_t POS_STOP = 0x42b2'0000U;
|
static constexpr uint32_t POS_STOP = 0x42b2'0000U;
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
|
||||||
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
||||||
mpfr::RoundingMode::Nearest);
|
mpfr::RoundingMode::Nearest);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundUp) {
|
||||||
test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
|
test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundDown) {
|
||||||
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
||||||
mpfr::RoundingMode::Downward);
|
mpfr::RoundingMode::Downward);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundTowardZero) {
|
||||||
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
test_full_range(POS_START, POS_STOP, NUM_THREADS,
|
||||||
mpfr::RoundingMode::TowardZero);
|
mpfr::RoundingMode::TowardZero);
|
||||||
}
|
}
|
||||||
|
@ -63,21 +63,21 @@ TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
|
||||||
static constexpr uint32_t NEG_START = 0x8000'0000U;
|
static constexpr uint32_t NEG_START = 0x8000'0000U;
|
||||||
static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
|
static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
|
||||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
||||||
mpfr::RoundingMode::Nearest);
|
mpfr::RoundingMode::Nearest);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundUp) {
|
||||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
|
test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundDown) {
|
||||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
||||||
mpfr::RoundingMode::Downward);
|
mpfr::RoundingMode::Downward);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) {
|
TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundTowardZero) {
|
||||||
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
|
||||||
mpfr::RoundingMode::TowardZero);
|
mpfr::RoundingMode::TowardZero);
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,6 +97,16 @@ TEST(LlvmLibcExpm1fTest, Borderline) {
|
||||||
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
||||||
__llvm_libc::expm1f(x), 0.5);
|
__llvm_libc::expm1f(x), 0.5);
|
||||||
EXPECT_MATH_ERRNO(0);
|
EXPECT_MATH_ERRNO(0);
|
||||||
|
|
||||||
|
x = float(FPBits(0x942ed494U));
|
||||||
|
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
||||||
|
__llvm_libc::expm1f(x), 0.5);
|
||||||
|
EXPECT_MATH_ERRNO(0);
|
||||||
|
|
||||||
|
x = float(FPBits(0xbdc1c6cbU));
|
||||||
|
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
|
||||||
|
__llvm_libc::expm1f(x), 0.5);
|
||||||
|
EXPECT_MATH_ERRNO(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(LlvmLibcExpm1fTest, InFloatRange) {
|
TEST(LlvmLibcExpm1fTest, InFloatRange) {
|
||||||
|
|
Loading…
Reference in New Issue