[libc] Implement expm1f function that is correctly rounded for all rounding modes.

Implement expm1f function that is correctly rounded for all rounding modes.  This is based on expf implementation.

From exhaustive testings, using expf implementation, and subtract 1.0 before rounding the final result to single precision
gives correctly rounded results for all |x| > 2^-4 with 1 exception.  When |x| < 2^-25, we use x + x^2 (implemented with a
single fma).  And for 2^-25 <= |x| <= 2^-4, we use a single degree-8 minimax polynomial generated by Sollya.

Reviewed By: sivachandra, zimmermann6

Differential Revision: https://reviews.llvm.org/D121574
This commit is contained in:
Tue Ly 2022-03-14 09:43:33 -04:00
parent 7e4cf582cf
commit 64af346b18
10 changed files with 353 additions and 188 deletions

View File

@ -476,6 +476,7 @@ add_entrypoint_object(
HDRS
../expf.h
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
libc.include.math
COMPILE_OPTIONS
@ -502,9 +503,11 @@ add_entrypoint_object(
HDRS
../expm1f.h
DEPENDS
.common_constants
libc.src.__support.FPUtil.fputil
libc.include.math
libc.src.math.expf
libc.src.math.fabsf
COMPILE_OPTIONS
-O3
)
add_entrypoint_object(

View File

@ -102,4 +102,128 @@ const double LOG_F[128] = {
0x1.58cadb5cd7989p-1, 0x1.5ad404c359f2cp-1, 0x1.5cdb1dc6c1764p-1,
0x1.5ee02a9241675p-1, 0x1.60e32f44788d8p-1};
// Lookup table for exp(m) with m = -104, ..., 89.
// -104 = floor(log(single precision's min denormal))
// 89 = ceil(log(single precision's max normal))
// Table is generated with Sollya as follow:
// > display = hexadecimal;
// > for i from -104 to 89 do { D(exp(i)); };
const double EXP_M1[195] = {
0x1.f1e6b68529e33p-151, 0x1.525be4e4e601dp-149, 0x1.cbe0a45f75eb1p-148,
0x1.3884e838aea68p-146, 0x1.a8c1f14e2af5dp-145, 0x1.20a717e64a9bdp-143,
0x1.8851d84118908p-142, 0x1.0a9bdfb02d240p-140, 0x1.6a5bea046b42ep-139,
0x1.ec7f3b269efa8p-138, 0x1.4eafb87eab0f2p-136, 0x1.c6e2d05bbc000p-135,
0x1.35208867c2683p-133, 0x1.a425b317eeacdp-132, 0x1.1d8508fa8246ap-130,
0x1.840fbc08fdc8ap-129, 0x1.07b7112bc1ffep-127, 0x1.666d0dad2961dp-126,
0x1.e726c3f64d0fep-125, 0x1.4b0dc07cabf98p-123, 0x1.c1f2daf3b6a46p-122,
0x1.31c5957a47de2p-120, 0x1.9f96445648b9fp-119, 0x1.1a6baeadb4fd1p-117,
0x1.7fd974d372e45p-116, 0x1.04da4d1452919p-114, 0x1.62891f06b3450p-113,
0x1.e1dd273aa8a4ap-112, 0x1.4775e0840bfddp-110, 0x1.bd109d9d94bdap-109,
0x1.2e73f53fba844p-107, 0x1.9b138170d6bfep-106, 0x1.175af0cf60ec5p-104,
0x1.7baee1bffa80bp-103, 0x1.02057d1245cebp-101, 0x1.5eafffb34ba31p-100,
0x1.dca23bae16424p-99, 0x1.43e7fc88b8056p-97, 0x1.b83bf23a9a9ebp-96,
0x1.2b2b8dd05b318p-94, 0x1.969d47321e4ccp-93, 0x1.1452b7723aed2p-91,
0x1.778fe2497184cp-90, 0x1.fe7116182e9ccp-89, 0x1.5ae191a99585ap-87,
0x1.d775d87da854dp-86, 0x1.4063f8cc8bb98p-84, 0x1.b374b315f87c1p-83,
0x1.27ec458c65e3cp-81, 0x1.923372c67a074p-80, 0x1.1152eaeb73c08p-78,
0x1.737c5645114b5p-77, 0x1.f8e6c24b5592ep-76, 0x1.571db733a9d61p-74,
0x1.d257d547e083fp-73, 0x1.3ce9b9de78f85p-71, 0x1.aebabae3a41b5p-70,
0x1.24b6031b49bdap-68, 0x1.8dd5e1bb09d7ep-67, 0x1.0e5b73d1ff53dp-65,
0x1.6f741de1748ecp-64, 0x1.f36bd37f42f3ep-63, 0x1.536452ee2f75cp-61,
0x1.cd480a1b74820p-60, 0x1.39792499b1a24p-58, 0x1.aa0de4bf35b38p-57,
0x1.2188ad6ae3303p-55, 0x1.898471fca6055p-54, 0x1.0b6c3afdde064p-52,
0x1.6b7719a59f0e0p-51, 0x1.ee001eed62aa0p-50, 0x1.4fb547c775da8p-48,
0x1.c8464f7616468p-47, 0x1.36121e24d3bbap-45, 0x1.a56e0c2ac7f75p-44,
0x1.1e642baeb84a0p-42, 0x1.853f01d6d53bap-41, 0x1.0885298767e9ap-39,
0x1.67852a7007e42p-38, 0x1.e8a37a45fc32ep-37, 0x1.4c1078fe9228ap-35,
0x1.c3527e433fab1p-34, 0x1.32b48bf117da2p-32, 0x1.a0db0d0ddb3ecp-31,
0x1.1b48655f37267p-29, 0x1.81056ff2c5772p-28, 0x1.05a628c699fa1p-26,
0x1.639e3175a689dp-25, 0x1.e355bbaee85cbp-24, 0x1.4875ca227ec38p-22,
0x1.be6c6fdb01612p-21, 0x1.2f6053b981d98p-19, 0x1.9c54c3b43bc8bp-18,
0x1.18354238f6764p-16, 0x1.7cd79b5647c9bp-15, 0x1.02cf22526545ap-13,
0x1.5fc21041027adp-12, 0x1.de16b9c24a98fp-11, 0x1.44e51f113d4d6p-9,
0x1.b993fe00d5376p-8, 0x1.2c155b8213cf4p-6, 0x1.97db0ccceb0afp-5,
0x1.152aaa3bf81ccp-3, 0x1.78b56362cef38p-2, 0x1.0000000000000p+0,
0x1.5bf0a8b145769p+1, 0x1.d8e64b8d4ddaep+2, 0x1.415e5bf6fb106p+4,
0x1.b4c902e273a58p+5, 0x1.28d389970338fp+7, 0x1.936dc5690c08fp+8,
0x1.122885aaeddaap+10, 0x1.749ea7d470c6ep+11, 0x1.fa7157c470f82p+12,
0x1.5829dcf950560p+14, 0x1.d3c4488ee4f7fp+15, 0x1.3de1654d37c9ap+17,
0x1.b00b5916ac955p+18, 0x1.259ac48bf05d7p+20, 0x1.8f0ccafad2a87p+21,
0x1.0f2ebd0a80020p+23, 0x1.709348c0ea4f9p+24, 0x1.f4f22091940bdp+25,
0x1.546d8f9ed26e1p+27, 0x1.ceb088b68e804p+28, 0x1.3a6e1fd9eecfdp+30,
0x1.ab5adb9c43600p+31, 0x1.226af33b1fdc1p+33, 0x1.8ab7fb5475fb7p+34,
0x1.0c3d3920962c9p+36, 0x1.6c932696a6b5dp+37, 0x1.ef822f7f6731dp+38,
0x1.50bba3796379ap+40, 0x1.c9aae4631c056p+41, 0x1.370470aec28edp+43,
0x1.a6b765d8cdf6dp+44, 0x1.1f43fcc4b662cp+46, 0x1.866f34a725782p+47,
0x1.0953e2f3a1ef7p+49, 0x1.689e221bc8d5bp+50, 0x1.ea215a1d20d76p+51,
0x1.4d13fbb1a001ap+53, 0x1.c4b334617cc67p+54, 0x1.33a43d282a519p+56,
0x1.a220d397972ebp+57, 0x1.1c25c88df6862p+59, 0x1.8232558201159p+60,
0x1.0672a3c9eb871p+62, 0x1.64b41c6d37832p+63, 0x1.e4cf766fe49bep+64,
0x1.49767bc0483e3p+66, 0x1.bfc951eb8bb76p+67, 0x1.304d6aeca254bp+69,
0x1.9d97010884251p+70, 0x1.19103e4080b45p+72, 0x1.7e013cd114461p+73,
0x1.03996528e074cp+75, 0x1.60d4f6fdac731p+76, 0x1.df8c5af17ba3bp+77,
0x1.45e3076d61699p+79, 0x1.baed16a6e0da7p+80, 0x1.2cffdfebde1a1p+82,
0x1.9919cabefcb69p+83, 0x1.160345c9953e3p+85, 0x1.79dbc9dc53c66p+86,
0x1.00c810d464097p+88, 0x1.5d009394c5c27p+89, 0x1.da57de8f107a8p+90,
0x1.425982cf597cdp+92, 0x1.b61e5ca3a5e31p+93, 0x1.29bb825dfcf87p+95,
0x1.94a90db0d6fe2p+96, 0x1.12fec759586fdp+98, 0x1.75c1dc469e3afp+99,
0x1.fbfd219c43b04p+100, 0x1.5936d44e1a146p+102, 0x1.d531d8a7ee79cp+103,
0x1.3ed9d24a2d51bp+105, 0x1.b15cfe5b6e17bp+106, 0x1.268038c2c0e00p+108,
0x1.9044a73545d48p+109, 0x1.1002ab6218b38p+111, 0x1.71b3540cbf921p+112,
0x1.f6799ea9c414ap+113, 0x1.55779b984f3ebp+115, 0x1.d01a210c44aa4p+116,
0x1.3b63da8e91210p+118, 0x1.aca8d6b0116b8p+119, 0x1.234de9e0c74e9p+121,
0x1.8bec7503ca477p+122, 0x1.0d0eda9796b90p+124, 0x1.6db0118477245p+125,
0x1.f1056dc7bf22dp+126, 0x1.51c2cc3433801p+128, 0x1.cb108ffbec164p+129,
};
// Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127.
// Table is generated with Sollya as follow:
// > display = hexadecimal;
// > for i from 0 to 127 do { D(exp(i / 128)); };
const double EXP_M2[128] = {
0x1.0000000000000p0, 0x1.0202015600446p0, 0x1.04080ab55de39p0,
0x1.06122436410ddp0, 0x1.08205601127edp0, 0x1.0a32a84e9c1f6p0,
0x1.0c49236829e8cp0, 0x1.0e63cfa7ab09dp0, 0x1.1082b577d34edp0,
0x1.12a5dd543ccc5p0, 0x1.14cd4fc989cd6p0, 0x1.16f9157587069p0,
0x1.192937074e0cdp0, 0x1.1b5dbd3f68122p0, 0x1.1d96b0eff0e79p0,
0x1.1fd41afcba45ep0, 0x1.2216045b6f5cdp0, 0x1.245c7613b8a9bp0,
0x1.26a7793f60164p0, 0x1.28f7170a755fdp0, 0x1.2b4b58b372c79p0,
0x1.2da4478b620c7p0, 0x1.3001ecf601af7p0, 0x1.32645269ea829p0,
0x1.34cb8170b5835p0, 0x1.373783a722012p0, 0x1.39a862bd3c106p0,
0x1.3c1e2876834aap0, 0x1.3e98deaa11dccp0, 0x1.41188f42c3e32p0,
0x1.439d443f5f159p0, 0x1.462707b2bac21p0, 0x1.48b5e3c3e8186p0,
0x1.4b49e2ae5ac67p0, 0x1.4de30ec211e60p0, 0x1.50817263c13cdp0,
0x1.5325180cfacf7p0, 0x1.55ce0a4c58c7cp0, 0x1.587c53c5a7af0p0,
0x1.5b2fff3210fd9p0, 0x1.5de9176045ff5p0, 0x1.60a7a734ab0e8p0,
0x1.636bb9a983258p0, 0x1.663559cf1bc7cp0, 0x1.690492cbf9433p0,
0x1.6bd96fdd034a2p0, 0x1.6eb3fc55b1e76p0, 0x1.719443a03acb9p0,
0x1.747a513dbef6ap0, 0x1.776630c678bc1p0, 0x1.7a57ede9ea23ep0,
0x1.7d4f946f0ba8dp0, 0x1.804d30347b546p0, 0x1.8350cd30ac390p0,
0x1.865a7772164c5p0, 0x1.896a3b1f66a0ep0, 0x1.8c802477b0010p0,
0x1.8f9c3fd29beafp0, 0x1.92be99a09bf00p0, 0x1.95e73e6b1b75ep0,
0x1.99163ad4b1dccp0, 0x1.9c4b9b995509bp0, 0x1.9f876d8e8c566p0,
0x1.a2c9bda3a3e78p0, 0x1.a61298e1e069cp0, 0x1.a9620c6cb3374p0,
0x1.acb82581eee54p0, 0x1.b014f179fc3b8p0, 0x1.b3787dc80f95fp0,
0x1.b6e2d7fa5eb18p0, 0x1.ba540dba56e56p0, 0x1.bdcc2cccd3c85p0,
0x1.c14b431256446p0, 0x1.c4d15e873c193p0, 0x1.c85e8d43f7cd0p0,
0x1.cbf2dd7d490f2p0, 0x1.cf8e5d84758a9p0, 0x1.d3311bc7822b4p0,
0x1.d6db26d16cd67p0, 0x1.da8c8d4a66969p0, 0x1.de455df80e3c0p0,
0x1.e205a7bdab73ep0, 0x1.e5cd799c6a54ep0, 0x1.e99ce2b397649p0,
0x1.ed73f240dc142p0, 0x1.f152b7a07bb76p0, 0x1.f539424d90f5ep0,
0x1.f927a1e24bb76p0, 0x1.fd1de6182f8c9p0, 0x1.008e0f64294abp1,
0x1.02912df5ce72ap1, 0x1.049856cd84339p1, 0x1.06a39207f0a09p1,
0x1.08b2e7d2035cfp1, 0x1.0ac6606916501p1, 0x1.0cde041b0e9aep1,
0x1.0ef9db467dcf8p1, 0x1.1119ee5ac36b6p1, 0x1.133e45d82e952p1,
0x1.1566ea50201d7p1, 0x1.1793e4652cc50p1, 0x1.19c53ccb3fc6bp1,
0x1.1bfafc47bda73p1, 0x1.1e352bb1a74adp1, 0x1.2073d3f1bd518p1,
0x1.22b6fe02a3b9cp1, 0x1.24feb2f105cb8p1, 0x1.274afbdbba4a6p1,
0x1.299be1f3e7f1cp1, 0x1.2bf16e7d2a38cp1, 0x1.2e4baacdb6614p1,
0x1.30aaa04e80d05p1, 0x1.330e587b62b28p1, 0x1.3576dce33feadp1,
0x1.37e437282d4eep1, 0x1.3a5670ff972edp1, 0x1.3ccd9432682b4p1,
0x1.3f49aa9d30590p1, 0x1.41cabe304cb34p1, 0x1.4450d8f00edd4p1,
0x1.46dc04f4e5338p1, 0x1.496c4c6b832dap1, 0x1.4c01b9950a111p1,
0x1.4e9c56c731f5dp1, 0x1.513c2e6c731d7p1, 0x1.53e14b042f9cap1,
0x1.568bb722dd593p1, 0x1.593b7d72305bbp1,
};
} // namespace __llvm_libc

View File

@ -17,6 +17,20 @@ extern const double ONE_OVER_F[128];
// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127.
extern const double LOG_F[128];
// Lookup table for exp(m) with m = -104, ..., 89.
// -104 = floor(log(single precision's min denormal))
// 89 = ceil(log(single precision's max normal))
// Table is generated with Sollya as follow:
// > display = hexadecimal;
// > for i from -104 to 89 do { D(exp(i)); };
extern const double EXP_M1[195];
// Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127.
// Table is generated with Sollya as follow:
// > display = hexadecimal;
// > for i from 0 to 127 do { D(exp(i / 128)); };
extern const double EXP_M2[128];
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "src/math/expf.h"
#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FMA.h"
@ -18,130 +19,6 @@
namespace __llvm_libc {
// Lookup table for exp(m) with m = -104, ..., 89.
// -104 = floor(log(single precision's min denormal))
// 89 = ceil(log(single precision's max normal))
// Table is generated with Sollya as follow:
// > display = hexadecimal;
// > for i from -104 to 89 do { D(exp(i)); };
static constexpr double EXP_M1[195] = {
0x1.f1e6b68529e33p-151, 0x1.525be4e4e601dp-149, 0x1.cbe0a45f75eb1p-148,
0x1.3884e838aea68p-146, 0x1.a8c1f14e2af5dp-145, 0x1.20a717e64a9bdp-143,
0x1.8851d84118908p-142, 0x1.0a9bdfb02d240p-140, 0x1.6a5bea046b42ep-139,
0x1.ec7f3b269efa8p-138, 0x1.4eafb87eab0f2p-136, 0x1.c6e2d05bbc000p-135,
0x1.35208867c2683p-133, 0x1.a425b317eeacdp-132, 0x1.1d8508fa8246ap-130,
0x1.840fbc08fdc8ap-129, 0x1.07b7112bc1ffep-127, 0x1.666d0dad2961dp-126,
0x1.e726c3f64d0fep-125, 0x1.4b0dc07cabf98p-123, 0x1.c1f2daf3b6a46p-122,
0x1.31c5957a47de2p-120, 0x1.9f96445648b9fp-119, 0x1.1a6baeadb4fd1p-117,
0x1.7fd974d372e45p-116, 0x1.04da4d1452919p-114, 0x1.62891f06b3450p-113,
0x1.e1dd273aa8a4ap-112, 0x1.4775e0840bfddp-110, 0x1.bd109d9d94bdap-109,
0x1.2e73f53fba844p-107, 0x1.9b138170d6bfep-106, 0x1.175af0cf60ec5p-104,
0x1.7baee1bffa80bp-103, 0x1.02057d1245cebp-101, 0x1.5eafffb34ba31p-100,
0x1.dca23bae16424p-99, 0x1.43e7fc88b8056p-97, 0x1.b83bf23a9a9ebp-96,
0x1.2b2b8dd05b318p-94, 0x1.969d47321e4ccp-93, 0x1.1452b7723aed2p-91,
0x1.778fe2497184cp-90, 0x1.fe7116182e9ccp-89, 0x1.5ae191a99585ap-87,
0x1.d775d87da854dp-86, 0x1.4063f8cc8bb98p-84, 0x1.b374b315f87c1p-83,
0x1.27ec458c65e3cp-81, 0x1.923372c67a074p-80, 0x1.1152eaeb73c08p-78,
0x1.737c5645114b5p-77, 0x1.f8e6c24b5592ep-76, 0x1.571db733a9d61p-74,
0x1.d257d547e083fp-73, 0x1.3ce9b9de78f85p-71, 0x1.aebabae3a41b5p-70,
0x1.24b6031b49bdap-68, 0x1.8dd5e1bb09d7ep-67, 0x1.0e5b73d1ff53dp-65,
0x1.6f741de1748ecp-64, 0x1.f36bd37f42f3ep-63, 0x1.536452ee2f75cp-61,
0x1.cd480a1b74820p-60, 0x1.39792499b1a24p-58, 0x1.aa0de4bf35b38p-57,
0x1.2188ad6ae3303p-55, 0x1.898471fca6055p-54, 0x1.0b6c3afdde064p-52,
0x1.6b7719a59f0e0p-51, 0x1.ee001eed62aa0p-50, 0x1.4fb547c775da8p-48,
0x1.c8464f7616468p-47, 0x1.36121e24d3bbap-45, 0x1.a56e0c2ac7f75p-44,
0x1.1e642baeb84a0p-42, 0x1.853f01d6d53bap-41, 0x1.0885298767e9ap-39,
0x1.67852a7007e42p-38, 0x1.e8a37a45fc32ep-37, 0x1.4c1078fe9228ap-35,
0x1.c3527e433fab1p-34, 0x1.32b48bf117da2p-32, 0x1.a0db0d0ddb3ecp-31,
0x1.1b48655f37267p-29, 0x1.81056ff2c5772p-28, 0x1.05a628c699fa1p-26,
0x1.639e3175a689dp-25, 0x1.e355bbaee85cbp-24, 0x1.4875ca227ec38p-22,
0x1.be6c6fdb01612p-21, 0x1.2f6053b981d98p-19, 0x1.9c54c3b43bc8bp-18,
0x1.18354238f6764p-16, 0x1.7cd79b5647c9bp-15, 0x1.02cf22526545ap-13,
0x1.5fc21041027adp-12, 0x1.de16b9c24a98fp-11, 0x1.44e51f113d4d6p-9,
0x1.b993fe00d5376p-8, 0x1.2c155b8213cf4p-6, 0x1.97db0ccceb0afp-5,
0x1.152aaa3bf81ccp-3, 0x1.78b56362cef38p-2, 0x1.0000000000000p+0,
0x1.5bf0a8b145769p+1, 0x1.d8e64b8d4ddaep+2, 0x1.415e5bf6fb106p+4,
0x1.b4c902e273a58p+5, 0x1.28d389970338fp+7, 0x1.936dc5690c08fp+8,
0x1.122885aaeddaap+10, 0x1.749ea7d470c6ep+11, 0x1.fa7157c470f82p+12,
0x1.5829dcf950560p+14, 0x1.d3c4488ee4f7fp+15, 0x1.3de1654d37c9ap+17,
0x1.b00b5916ac955p+18, 0x1.259ac48bf05d7p+20, 0x1.8f0ccafad2a87p+21,
0x1.0f2ebd0a80020p+23, 0x1.709348c0ea4f9p+24, 0x1.f4f22091940bdp+25,
0x1.546d8f9ed26e1p+27, 0x1.ceb088b68e804p+28, 0x1.3a6e1fd9eecfdp+30,
0x1.ab5adb9c43600p+31, 0x1.226af33b1fdc1p+33, 0x1.8ab7fb5475fb7p+34,
0x1.0c3d3920962c9p+36, 0x1.6c932696a6b5dp+37, 0x1.ef822f7f6731dp+38,
0x1.50bba3796379ap+40, 0x1.c9aae4631c056p+41, 0x1.370470aec28edp+43,
0x1.a6b765d8cdf6dp+44, 0x1.1f43fcc4b662cp+46, 0x1.866f34a725782p+47,
0x1.0953e2f3a1ef7p+49, 0x1.689e221bc8d5bp+50, 0x1.ea215a1d20d76p+51,
0x1.4d13fbb1a001ap+53, 0x1.c4b334617cc67p+54, 0x1.33a43d282a519p+56,
0x1.a220d397972ebp+57, 0x1.1c25c88df6862p+59, 0x1.8232558201159p+60,
0x1.0672a3c9eb871p+62, 0x1.64b41c6d37832p+63, 0x1.e4cf766fe49bep+64,
0x1.49767bc0483e3p+66, 0x1.bfc951eb8bb76p+67, 0x1.304d6aeca254bp+69,
0x1.9d97010884251p+70, 0x1.19103e4080b45p+72, 0x1.7e013cd114461p+73,
0x1.03996528e074cp+75, 0x1.60d4f6fdac731p+76, 0x1.df8c5af17ba3bp+77,
0x1.45e3076d61699p+79, 0x1.baed16a6e0da7p+80, 0x1.2cffdfebde1a1p+82,
0x1.9919cabefcb69p+83, 0x1.160345c9953e3p+85, 0x1.79dbc9dc53c66p+86,
0x1.00c810d464097p+88, 0x1.5d009394c5c27p+89, 0x1.da57de8f107a8p+90,
0x1.425982cf597cdp+92, 0x1.b61e5ca3a5e31p+93, 0x1.29bb825dfcf87p+95,
0x1.94a90db0d6fe2p+96, 0x1.12fec759586fdp+98, 0x1.75c1dc469e3afp+99,
0x1.fbfd219c43b04p+100, 0x1.5936d44e1a146p+102, 0x1.d531d8a7ee79cp+103,
0x1.3ed9d24a2d51bp+105, 0x1.b15cfe5b6e17bp+106, 0x1.268038c2c0e00p+108,
0x1.9044a73545d48p+109, 0x1.1002ab6218b38p+111, 0x1.71b3540cbf921p+112,
0x1.f6799ea9c414ap+113, 0x1.55779b984f3ebp+115, 0x1.d01a210c44aa4p+116,
0x1.3b63da8e91210p+118, 0x1.aca8d6b0116b8p+119, 0x1.234de9e0c74e9p+121,
0x1.8bec7503ca477p+122, 0x1.0d0eda9796b90p+124, 0x1.6db0118477245p+125,
0x1.f1056dc7bf22dp+126, 0x1.51c2cc3433801p+128, 0x1.cb108ffbec164p+129,
};
// Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127.
// Table is generated with Sollya as follow:
// > display = hexadecimal;
// > for i from 0 to 127 do { D(exp(i / 128)); };
static constexpr double EXP_M2[128] = {
0x1.0000000000000p0, 0x1.0202015600446p0, 0x1.04080ab55de39p0,
0x1.06122436410ddp0, 0x1.08205601127edp0, 0x1.0a32a84e9c1f6p0,
0x1.0c49236829e8cp0, 0x1.0e63cfa7ab09dp0, 0x1.1082b577d34edp0,
0x1.12a5dd543ccc5p0, 0x1.14cd4fc989cd6p0, 0x1.16f9157587069p0,
0x1.192937074e0cdp0, 0x1.1b5dbd3f68122p0, 0x1.1d96b0eff0e79p0,
0x1.1fd41afcba45ep0, 0x1.2216045b6f5cdp0, 0x1.245c7613b8a9bp0,
0x1.26a7793f60164p0, 0x1.28f7170a755fdp0, 0x1.2b4b58b372c79p0,
0x1.2da4478b620c7p0, 0x1.3001ecf601af7p0, 0x1.32645269ea829p0,
0x1.34cb8170b5835p0, 0x1.373783a722012p0, 0x1.39a862bd3c106p0,
0x1.3c1e2876834aap0, 0x1.3e98deaa11dccp0, 0x1.41188f42c3e32p0,
0x1.439d443f5f159p0, 0x1.462707b2bac21p0, 0x1.48b5e3c3e8186p0,
0x1.4b49e2ae5ac67p0, 0x1.4de30ec211e60p0, 0x1.50817263c13cdp0,
0x1.5325180cfacf7p0, 0x1.55ce0a4c58c7cp0, 0x1.587c53c5a7af0p0,
0x1.5b2fff3210fd9p0, 0x1.5de9176045ff5p0, 0x1.60a7a734ab0e8p0,
0x1.636bb9a983258p0, 0x1.663559cf1bc7cp0, 0x1.690492cbf9433p0,
0x1.6bd96fdd034a2p0, 0x1.6eb3fc55b1e76p0, 0x1.719443a03acb9p0,
0x1.747a513dbef6ap0, 0x1.776630c678bc1p0, 0x1.7a57ede9ea23ep0,
0x1.7d4f946f0ba8dp0, 0x1.804d30347b546p0, 0x1.8350cd30ac390p0,
0x1.865a7772164c5p0, 0x1.896a3b1f66a0ep0, 0x1.8c802477b0010p0,
0x1.8f9c3fd29beafp0, 0x1.92be99a09bf00p0, 0x1.95e73e6b1b75ep0,
0x1.99163ad4b1dccp0, 0x1.9c4b9b995509bp0, 0x1.9f876d8e8c566p0,
0x1.a2c9bda3a3e78p0, 0x1.a61298e1e069cp0, 0x1.a9620c6cb3374p0,
0x1.acb82581eee54p0, 0x1.b014f179fc3b8p0, 0x1.b3787dc80f95fp0,
0x1.b6e2d7fa5eb18p0, 0x1.ba540dba56e56p0, 0x1.bdcc2cccd3c85p0,
0x1.c14b431256446p0, 0x1.c4d15e873c193p0, 0x1.c85e8d43f7cd0p0,
0x1.cbf2dd7d490f2p0, 0x1.cf8e5d84758a9p0, 0x1.d3311bc7822b4p0,
0x1.d6db26d16cd67p0, 0x1.da8c8d4a66969p0, 0x1.de455df80e3c0p0,
0x1.e205a7bdab73ep0, 0x1.e5cd799c6a54ep0, 0x1.e99ce2b397649p0,
0x1.ed73f240dc142p0, 0x1.f152b7a07bb76p0, 0x1.f539424d90f5ep0,
0x1.f927a1e24bb76p0, 0x1.fd1de6182f8c9p0, 0x1.008e0f64294abp1,
0x1.02912df5ce72ap1, 0x1.049856cd84339p1, 0x1.06a39207f0a09p1,
0x1.08b2e7d2035cfp1, 0x1.0ac6606916501p1, 0x1.0cde041b0e9aep1,
0x1.0ef9db467dcf8p1, 0x1.1119ee5ac36b6p1, 0x1.133e45d82e952p1,
0x1.1566ea50201d7p1, 0x1.1793e4652cc50p1, 0x1.19c53ccb3fc6bp1,
0x1.1bfafc47bda73p1, 0x1.1e352bb1a74adp1, 0x1.2073d3f1bd518p1,
0x1.22b6fe02a3b9cp1, 0x1.24feb2f105cb8p1, 0x1.274afbdbba4a6p1,
0x1.299be1f3e7f1cp1, 0x1.2bf16e7d2a38cp1, 0x1.2e4baacdb6614p1,
0x1.30aaa04e80d05p1, 0x1.330e587b62b28p1, 0x1.3576dce33feadp1,
0x1.37e437282d4eep1, 0x1.3a5670ff972edp1, 0x1.3ccd9432682b4p1,
0x1.3f49aa9d30590p1, 0x1.41cabe304cb34p1, 0x1.4450d8f00edd4p1,
0x1.46dc04f4e5338p1, 0x1.496c4c6b832dap1, 0x1.4c01b9950a111p1,
0x1.4e9c56c731f5dp1, 0x1.513c2e6c731d7p1, 0x1.53e14b042f9cap1,
0x1.568bb722dd593p1, 0x1.593b7d72305bbp1,
};
INLINE_FMA
LLVM_LIBC_FUNCTION(float, expf, (float x)) {
using FPBits = typename fputil::FPBits<float>;
@ -157,8 +34,7 @@ LLVM_LIBC_FUNCTION(float, expf, (float x)) {
return x;
if (fputil::get_round() == FE_UPWARD)
return static_cast<float>(FPBits(FPBits::MIN_SUBNORMAL));
if (x != 0.0f)
errno = ERANGE;
errno = ERANGE;
return 0.0f;
}
// x >= 89 or nan

View File

@ -1,4 +1,4 @@
//===-- Implementation of expm1f function ---------------------------------===//
//===-- Single-precision e^x - 1 function ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -7,52 +7,131 @@
//===----------------------------------------------------------------------===//
#include "src/math/expm1f.h"
#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FMA.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/common.h"
#include "src/math/expf.h"
#include <errno.h>
namespace __llvm_libc {
// When |x| > Ln2, catastrophic cancellation does not occur with the
// subtraction expf(x) - 1.0f, so we use it to compute expm1f(x).
//
// We divide [-Ln2; Ln2] into 3 subintervals [-Ln2; -1/8], [-1/8; 1/8],
// [1/8; Ln2]. And we use a degree-6 polynomial to approximate exp(x) - 1 in
// each interval. The coefficients were generated by Sollya's fpminmax.
//
// See libc/utils/mathtools/expm1f.sollya for more detail.
INLINE_FMA
LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
const float ln2 =
0.69314718055994530941723212145817656807550013436025f; // For C++17:
// 0x1.62e'42ffp-1
float abs_x = __llvm_libc::fputil::abs(x);
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
if (abs_x <= ln2) {
if (abs_x <= 0.125f) {
return x * __llvm_libc::fputil::polyeval(
x, 1.0f, 0.5f, 0.16666664183139801025390625f,
4.1666664183139801025390625e-2f,
8.3379410207271575927734375e-3f,
1.3894210569560527801513671875e-3f);
}
if (x > 0.125f) {
return __llvm_libc::fputil::polyeval(
x, 1.23142086749794543720781803131103515625e-7f,
0.9999969005584716796875f, 0.500031292438507080078125f,
0.16650259494781494140625f, 4.21491153538227081298828125e-2f,
7.53940828144550323486328125e-3f,
2.05591344274580478668212890625e-3f);
}
return __llvm_libc::fputil::polyeval(
x, -6.899231408397099585272371768951416015625e-8f,
0.999998271465301513671875f, 0.4999825656414031982421875f,
0.16657467186450958251953125f, 4.1390590369701385498046875e-2f,
7.856394164264202117919921875e-3f,
9.380675037391483783721923828125e-4f);
// When x < log(2^-25) or nan
if (unlikely(xbits.uintval() >= 0xc18a'a123U)) {
// exp(-Inf) = 0
if (xbits.is_inf())
return -1.0f;
// exp(nan) = nan
if (xbits.is_nan())
return x;
int round_mode = fputil::get_round();
if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO)
return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f
return -1.0f;
}
return expf(x) - 1.0f;
// x >= 89 or nan
if (unlikely(!xbits.get_sign() && (xbits.uintval() >= 0x42b2'0000))) {
if (xbits.uintval() < 0x7f80'0000U) {
int rounding = fputil::get_round();
if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
return static_cast<float>(FPBits(FPBits::MAX_NORMAL));
errno = ERANGE;
}
return x + static_cast<float>(FPBits::inf());
}
int unbiased_exponent = static_cast<int>(xbits.get_unbiased_exponent());
// |x| < 2^-4
if (unbiased_exponent < 123) {
// |x| < 2^-25
if (unbiased_exponent < 102) {
// x = -0.0f
if (unlikely(xbits.uintval() == 0x8000'0000U))
return x;
// When |x| < 2^-25, the relative error:
// |(e^x - 1) - x| / |x| < |x^2| / |x| = |x| < 2^-25 < epsilon(1)/2.
// So the correctly rounded values of expm1(x) are:
// = x + eps(x) if rounding mode = FE_UPWARD,
// or (rounding mode = FE_TOWARDZERO and x is negative),
// = x otherwise.
// To simplify the rounding decision and make it more efficient, we use
// fma(x, x, x) ~ x + x^2 instead.
return fputil::fma(x, x, x);
}
// 2^-25 <= |x| < 2^-4
double xd = static_cast<double>(x);
double xsq = xd * xd;
// Degree-8 minimax polynomial generated by Sollya with:
// > display = hexadecimal;
// > P = fpminimax(expm1(x)/x, 7, [|D...|], [-2^-4, 2^-4]);
double r =
fputil::polyeval(xd, 0x1p-1, 0x1.55555555559abp-3, 0x1.55555555551a7p-5,
0x1.111110f70f2a4p-7, 0x1.6c16c17639e82p-10,
0x1.a02526febbea6p-13, 0x1.a01dc40888fcdp-16);
return static_cast<float>(fputil::fma(r, xsq, xd));
}
// For -18 < x < 89, to compute exp(x), we perform the following range
// reduction: find hi, mid, lo such that:
// x = hi + mid + lo, in which
// hi is an integer,
// mid * 2^7 is an integer
// -2^(-8) <= lo < 2^-8.
// In particular,
// hi + mid = round(x * 2^7) * 2^(-7).
// Then,
// exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo).
// We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
// respectively. exp(lo) is computed using a degree-7 minimax polynomial
// generated by Sollya.
// Exceptional value
if (xbits.uintval() == 0xbdc1'c6cbU) {
// x = -0x1.838d96p-4f
int round_mode = fputil::get_round();
if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
return -0x1.71c884p-4f;
return -0x1.71c882p-4f;
}
// x_hi = hi + mid.
int x_hi = static_cast<int>(x * 0x1.0p7f);
// Subtract (hi + mid) from x to get lo.
x -= static_cast<float>(x_hi) * 0x1.0p-7f;
double xd = static_cast<double>(x);
// Make sure that -2^(-8) <= lo < 2^-8.
if (x >= 0x1.0p-8f) {
++x_hi;
xd -= 0x1.0p-7;
}
if (x < -0x1.0p-8f) {
--x_hi;
xd += 0x1.0p-7;
}
x_hi += 104 << 7;
// hi = x_hi >> 7
double exp_hi = EXP_M1[x_hi >> 7];
// lo = x_hi & 0x0000'007fU;
double exp_mid = EXP_M2[x_hi & 0x7f];
double exp_hi_mid = exp_hi * exp_mid;
// Degree-7 minimax polynomial generated by Sollya with the following
// commands:
// > display = hexadecimal;
// > Q = fpminimax(expm1(x)/x, 6, [|D...|], [-2^-8, 2^-8]);
// > Q;
double exp_lo = fputil::polyeval(
xd, 0x1p0, 0x1p0, 0x1p-1, 0x1.5555555555555p-3, 0x1.55555555553ap-5,
0x1.1111111204dfcp-7, 0x1.6c16cb2da593ap-10, 0x1.9ff1648996d2ep-13);
return static_cast<float>(fputil::fma(exp_hi_mid, exp_lo, -1.0));
}
} // namespace __llvm_libc

View File

@ -83,15 +83,20 @@ add_fp_unittest(
add_fp_unittest(
expm1f_test
NO_RUN_POSTBUILD
NEED_MPFR
SUITE
libc_math_exhaustive_tests
SRCS
expm1f_test.cpp
expm1f_test.cpp
DEPENDS
.exhaustive_test
libc.include.math
libc.src.math.expf
libc.src.math.expm1f
libc.src.__support.FPUtil.fputil
LINK_OPTIONS
-lpthread
)
add_fp_unittest(

View File

@ -32,10 +32,12 @@ void LlvmLibcExhaustiveTest<T>::test_full_range(T start, T stop, int nthreads,
std::cout << msg.str();
msg.str("");
check(begin, end, rounding);
bool result;
check(begin, end, rounding, result);
msg << "** Finished testing from " << std::dec << begin << " to " << end
<< " [0x" << std::hex << begin << ", 0x" << end << ")" << std::endl;
<< " [0x" << std::hex << begin << ", 0x" << end
<< ") : " << (result ? "PASSED" : "FAILED") << std::endl;
std::cout << msg.str();
});
begin += increment;

View File

@ -22,5 +22,6 @@ struct LlvmLibcExhaustiveTest : public __llvm_libc::testing::Test {
void test_full_range(T start, T stop, int nthreads,
mpfr::RoundingMode rounding);
virtual void check(T start, T stop, mpfr::RoundingMode rounding) = 0;
virtual void check(T start, T stop, mpfr::RoundingMode rounding,
bool &result) = 0;
};

View File

@ -1,4 +1,4 @@
//===-- Exhaustive test for expm1f-----------------------------------------===//
//===-- Exhaustive test for expm1f ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@ -6,22 +6,76 @@
//
//===----------------------------------------------------------------------===//
#include "exhaustive_test.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/math/expm1f.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
#include <math.h>
#include "utils/UnitTest/FPMatcher.h"
using FPBits = __llvm_libc::fputil::FPBits<float>;
namespace mpfr = __llvm_libc::testing::mpfr;
TEST(LlvmLibcExpm1fExhaustiveTest, AllValues) {
uint32_t bits = 0;
do {
FPBits x(bits);
if (!x.is_inf_or_nan() && float(x) < 88.70f) {
ASSERT_MPFR_MATCH(mpfr::Operation::Expm1, float(x),
__llvm_libc::expm1f(float(x)), 1.5);
}
} while (bits++ < 0xffff'ffffU);
struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
void check(uint32_t start, uint32_t stop, mpfr::RoundingMode rounding,
bool &result) override {
mpfr::ForceRoundingMode r(rounding);
uint32_t bits = start;
result = false;
do {
FPBits xbits(bits);
float x = float(xbits);
EXPECT_MPFR_MATCH(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 0.5,
rounding);
} while (bits++ < stop);
result = true;
}
};
static constexpr int NUM_THREADS = 16;
// Range: [0, 89];
static constexpr uint32_t POS_START = 0x0000'0000U;
static constexpr uint32_t POS_STOP = 0x42b2'0000U;
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::Nearest);
}
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) {
test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::Downward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::TowardZero);
}
// Range: [-104, 0];
static constexpr uint32_t NEG_START = 0x8000'0000U;
static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::Nearest);
}
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::Downward);
}
TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::TowardZero);
}

View File

@ -54,15 +54,12 @@ TEST(LlvmLibcExpm1fTest, Overflow) {
TEST(LlvmLibcExpm1fTest, Underflow) {
errno = 0;
EXPECT_FP_EQ(-1.0f, __llvm_libc::expm1f(float(FPBits(0xff7fffffU))));
EXPECT_MATH_ERRNO(ERANGE);
float x = float(FPBits(0xc2cffff8U));
EXPECT_FP_EQ(-1.0f, __llvm_libc::expm1f(x));
EXPECT_MATH_ERRNO(ERANGE);
x = float(FPBits(0xc2d00008U));
EXPECT_FP_EQ(-1.0f, __llvm_libc::expm1f(x));
EXPECT_MATH_ERRNO(ERANGE);
}
// Test with inputs which are the borders of underflow/overflow but still
@ -72,19 +69,28 @@ TEST(LlvmLibcExpm1fTest, Borderline) {
errno = 0;
x = float(FPBits(0x42affff8U));
ASSERT_MPFR_MATCH(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 1.0);
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
x = float(FPBits(0x42b00008U));
ASSERT_MPFR_MATCH(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 1.0);
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
x = float(FPBits(0xc2affff8U));
ASSERT_MPFR_MATCH(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 1.0);
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
x = float(FPBits(0xc2b00008U));
ASSERT_MPFR_MATCH(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 1.0);
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
x = float(FPBits(0x3dc252ddU));
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
}
@ -104,6 +110,7 @@ TEST(LlvmLibcExpm1fTest, InFloatRange) {
// wider precision.
if (isnan(result) || isinf(result) || errno != 0)
continue;
ASSERT_MPFR_MATCH(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 2.2);
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
}
}