[libc] Add range reduction functions based on Paine and Hanek algorithm.

These functions will be used in a future patch to implement
trigonometric functions. Unit tests have been added but to the
libc-long-running-tests suite. The unit tests long running because we
compare against MPFR computations performed at 1280 bits of precision.

Some cleanups or elimination of repeated patterns can be done as follow
up changes.

Differential Revision: https://reviews.llvm.org/D104817
This commit is contained in:
Siva Chandra Reddy 2021-06-04 06:12:50 +00:00
parent 2c6ffb4eb2
commit ca6b354229
10 changed files with 665 additions and 1 deletions

View File

@ -26,6 +26,8 @@ add_header_library(
NormalFloat.h
PlatformDefs.h
PolyEval.h
UInt.h
XFloat.h
DEPENDS
libc.include.math
libc.include.errno

View File

@ -0,0 +1,236 @@
//===-- A class to manipulate wide integers. --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_UTILS_FPUTIL_UINT_H
#define LLVM_LIBC_UTILS_FPUTIL_UINT_H
#include <stddef.h> // For size_t
#include <stdint.h>
namespace __llvm_libc {
namespace fputil {
template <size_t Bits> class UInt {
// This is mainly used for debugging.
enum Kind {
NotANumber,
Valid,
};
static_assert(Bits > 0 && Bits % 64 == 0,
"Number of bits in UInt should be a multiple of 64.");
static constexpr uint64_t Mask32 = 0xFFFFFFFF;
static constexpr size_t WordCount = Bits / 64;
static constexpr uint64_t InvalidHexDigit = 20;
uint64_t val[WordCount];
Kind kind;
uint64_t low(uint64_t v) { return v & Mask32; }
uint64_t high(uint64_t v) { return (v >> 32) & Mask32; }
uint64_t hexval(char c) {
uint64_t diff;
if ((diff = uint64_t(c) - 'A') < 6)
return diff + 10;
else if ((diff = uint64_t(c) - 'a') < 6)
return diff + 10;
else if ((diff = uint64_t(c) - '0') < 10)
return diff;
else
return InvalidHexDigit;
}
size_t strlen(const char *s) {
size_t len;
for (len = 0; *s != '\0'; ++s, ++len)
;
return len;
}
public:
UInt() { kind = Valid; }
UInt(const UInt<Bits> &other) : kind(other.kind) {
if (kind == Valid) {
for (size_t i = 0; i < WordCount; ++i)
val[i] = other.val[i];
}
}
// This constructor is used for debugging.
explicit UInt(const char *s) {
size_t len = strlen(s);
if (len > Bits / 4 + 2 || len < 3) {
kind = NotANumber;
return;
}
if (!(s[0] == '0' && s[1] == 'x')) {
kind = NotANumber;
return;
}
for (size_t i = 0; i < WordCount; ++i)
val[i] = 0;
for (size_t i = len - 1, w = 0; i >= 2; --i, w += 4) {
uint64_t hex = hexval(s[i]);
if (hex == InvalidHexDigit) {
kind = NotANumber;
return;
}
val[w / 64] |= (hex << (w % 64));
}
kind = Valid;
}
explicit UInt(uint64_t v) {
val[0] = v;
for (size_t i = 1; i < WordCount; ++i)
val[i] = 0;
kind = Valid;
}
explicit UInt(uint64_t data[WordCount]) {
for (size_t i = 0; i < WordCount; ++i)
val[i] = data[i];
kind = Valid;
}
bool is_valid() const { return kind == Valid; }
// Add x to this number and store the result in this number.
// Returns the carry value produced by the addition operation.
uint64_t add(const UInt<Bits> &x) {
uint64_t carry = 0;
for (size_t i = 0; i < WordCount; ++i) {
uint64_t res_lo = low(val[i]) + low(x.val[i]) + carry;
carry = high(res_lo);
res_lo = low(res_lo);
uint64_t res_hi = high(val[i]) + high(x.val[i]) + carry;
carry = high(res_hi);
res_hi = low(res_hi);
val[i] = res_lo + (res_hi << 32);
}
return carry;
}
// Multiply this number with x and store the result in this number. It is
// implemented using the long multiplication algorithm by splitting the
// 64-bit words of this number and |x| in to 32-bit halves but peforming
// the operations using 64-bit numbers. This ensures that we don't lose the
// carry bits.
// Returns the carry value produced by the multiplication operation.
uint64_t mul(uint64_t x) {
uint64_t x_lo = low(x);
uint64_t x_hi = high(x);
uint64_t row1[WordCount + 1];
uint64_t carry = 0;
for (size_t i = 0; i < WordCount; ++i) {
uint64_t l = low(val[i]);
uint64_t h = high(val[i]);
uint64_t p1 = x_lo * l;
uint64_t p2 = x_lo * h;
uint64_t res_lo = low(p1) + carry;
carry = high(res_lo);
uint64_t res_hi = high(p1) + low(p2) + carry;
carry = high(res_hi) + high(p2);
res_lo = low(res_lo);
res_hi = low(res_hi);
row1[i] = res_lo + (res_hi << 32);
}
row1[WordCount] = carry;
uint64_t row2[WordCount + 1];
row2[0] = 0;
carry = 0;
for (size_t i = 0; i < WordCount; ++i) {
uint64_t l = low(val[i]);
uint64_t h = high(val[i]);
uint64_t p1 = x_hi * l;
uint64_t p2 = x_hi * h;
uint64_t res_lo = low(p1) + carry;
carry = high(res_lo);
uint64_t res_hi = high(p1) + low(p2) + carry;
carry = high(res_hi) + high(p2);
res_lo = low(res_lo);
res_hi = low(res_hi);
row2[i] = res_lo + (res_hi << 32);
}
row2[WordCount] = carry;
UInt<(WordCount + 1) * 64> r1(row1), r2(row2);
r2.shift_left(32);
r1.add(r2);
for (size_t i = 0; i < WordCount; ++i) {
val[i] = r1[i];
}
return r1[WordCount];
}
void shift_left(size_t s) {
const size_t drop = s / 64; // Number of words to drop
const size_t shift = s % 64; // Bits to shift in the remaining words.
const uint64_t mask = ((uint64_t(1) << shift) - 1) << (64 - shift);
for (size_t i = WordCount; drop > 0 && i > 0; --i) {
if (i - drop > 0)
val[i - 1] = val[i - drop - 1];
else
val[i - 1] = 0;
}
for (size_t i = WordCount; shift > 0 && i > drop; --i) {
uint64_t drop_val = (val[i - 1] & mask) >> (64 - shift);
val[i - 1] <<= shift;
if (i < WordCount)
val[i] |= drop_val;
}
}
void shift_right(size_t s) {
const size_t drop = s / 64; // Number of words to drop
const size_t shift = s % 64; // Bit shift in the remaining words.
const uint64_t mask = (uint64_t(1) << shift) - 1;
for (size_t i = 0; drop > 0 && i < WordCount; ++i) {
if (i + drop < WordCount)
val[i] = val[i + drop];
else
val[i] = 0;
}
for (size_t i = 0; shift > 0 && i < WordCount; ++i) {
uint64_t drop_val = ((val[i] & mask) << (64 - shift));
val[i] >>= shift;
if (i > 0)
val[i - 1] |= drop_val;
}
}
const uint64_t &operator[](size_t i) const { return val[i]; }
uint64_t &operator[](size_t i) { return val[i]; }
uint64_t *data() { return val; }
const uint64_t *data() const { return val; }
};
} // namespace fputil
} // namespace __llvm_libc
#endif // LLVM_LIBC_UTILS_FPUTIL_UINT_H

View File

@ -0,0 +1,180 @@
//===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "FPBits.h"
#include "NormalFloat.h"
#include "UInt.h"
#include <stdint.h>
namespace __llvm_libc {
namespace fputil {
// Store and manipulate positive double precision numbers at |Precision| bits.
template <size_t Precision> class XFloat {
static constexpr uint64_t OneMask = (uint64_t(1) << 63);
UInt<Precision> man;
static constexpr uint64_t WordCount = Precision / 64;
int exp;
size_t bit_width(uint64_t x) {
if (x == 0)
return 0;
size_t shift = 0;
while ((OneMask & x) == 0) {
++shift;
x <<= 1;
}
return 64 - shift;
}
public:
XFloat() : exp(0) {
for (int i = 0; i < WordCount; ++i)
man[i] = 0;
}
XFloat(const XFloat &other) : exp(other.exp) {
for (int i = 0; i < WordCount; ++i)
man[i] = other.man[i];
}
explicit XFloat(double x) {
auto xn = NormalFloat<double>(x);
exp = xn.exponent;
man[WordCount - 1] = xn.mantissa << 11;
for (int i = 0; i < WordCount - 1; ++i)
man[i] = 0;
}
XFloat(int e, const UInt<Precision> &bits) : exp(e) {
for (size_t i = 0; i < WordCount; ++i)
man[i] = bits[i];
}
// Multiply this number with x and store the result in this number.
void mul(double x) {
auto xn = NormalFloat<double>(x);
exp += xn.exponent;
uint64_t carry = man.mul(xn.mantissa << 11);
size_t carry_width = bit_width(carry);
carry_width = (carry_width == 64 ? 64 : 63);
man.shift_right(carry_width);
man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
exp += carry_width == 64 ? 1 : 0;
normalize();
}
void drop_int() {
if (exp < 0)
return;
if (exp > int(Precision - 1)) {
for (size_t i = 0; i < WordCount; ++i)
man[i] = 0;
return;
}
man.shift_left(exp + 1);
man.shift_right(exp + 1);
normalize();
}
double mul(const XFloat<Precision> &other) {
constexpr size_t row_words = 2 * WordCount + 1;
constexpr size_t row_precision = row_words * 64;
constexpr size_t result_bits = 2 * Precision;
UInt<row_precision> rows[WordCount];
for (size_t r = 0; r < WordCount; ++r) {
for (size_t i = 0; i < row_words; ++i) {
if (i < WordCount)
rows[r][i] = man[i];
else
rows[r][i] = 0;
}
rows[r].mul(other.man[r]);
rows[r].shift_left(r * 64);
}
for (size_t r = 1; r < WordCount; ++r) {
rows[0].add(rows[r]);
}
int result_exp = exp + other.exp;
uint64_t carry = rows[0][row_words - 1];
if (carry) {
size_t carry_width = bit_width(carry);
rows[0].shift_right(carry_width);
rows[0][row_words - 2] =
rows[0][row_words - 2] + (carry << (64 - carry_width));
result_exp += carry_width;
}
if (rows[0][row_words - 2] & OneMask) {
++result_exp;
} else {
rows[0].shift_left(1);
}
UInt<result_bits> result_man;
for (size_t i = 0; i < result_bits / 64; ++i)
result_man[i] = rows[0][i];
XFloat<result_bits> result(result_exp, result_man);
result.normalize();
return double(result);
}
explicit operator double() {
normalize();
constexpr uint64_t one = uint64_t(1) << 10;
constexpr uint64_t excess_mask = (one << 1) - 1;
uint64_t excess = man[WordCount - 1] & excess_mask;
uint64_t new_man = man[WordCount - 1] >> 11;
if (excess > one) {
// We have to round up.
++new_man;
} else if (excess == one) {
bool greater_than_one = false;
for (size_t i = 0; i < WordCount - 1; ++i) {
greater_than_one = (man[i] != 0);
if (greater_than_one)
break;
}
if (greater_than_one || (new_man & 1) != 0) {
++new_man;
}
}
if (new_man == (uint64_t(1) << 53))
++exp;
// We use NormalFloat as it can produce subnormal numbers or underflow to 0
// if necessary.
NormalFloat<double> d(exp, new_man, 0);
return double(d);
}
// Normalizes this number.
void normalize() {
uint64_t man_bits = 0;
for (size_t i = 0; i < WordCount; ++i)
man_bits |= man[i];
if (man_bits == 0)
return;
while ((man[WordCount - 1] & OneMask) == 0) {
man.shift_left(1);
--exp;
}
}
};
} // namespace fputil
} // namespace __llvm_libc

View File

@ -992,3 +992,15 @@ add_entrypoint_object(
COMPILE_OPTIONS
-O2
)
add_object_library(
dp_trig
SRCS
dp_trig.cpp
HDRS
dp_trig.h
DEPENDS
libc.src.__support.FPUtil.fputil
COMPILE_OPTIONS
-O3
)

View File

@ -0,0 +1,105 @@
//===-- Utilities for double precision trigonometric functions ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/FPUtil/UInt.h"
#include "src/__support/FPUtil/XFloat.h"
using FPBits = __llvm_libc::fputil::FPBits<double>;
namespace __llvm_libc {
// Implementation is based on the Payne and Hanek range reduction algorithm.
// The caller should ensure that x is positive.
// Consider:
// x/y = x * 1/y = I + F
// I is the integral part and F the fractional part of the result of the
// division operation. Then M = mod(x, y) = F * y. In order to compute M, we
// first compute F. We do it by dropping bits from 1/y which would only
// contribute integral results in the operation x * 1/y. This helps us get
// accurate values of F even when x is a very large number.
//
// Internal operations are performed at 192 bits of precision.
static double mod_impl(double x, const uint64_t y_bits[3],
const uint64_t inv_y_bits[20], int y_exponent,
int inv_y_exponent) {
FPBits bits(x);
int exponent = bits.getExponent();
int bit_drop = (exponent - 52) + inv_y_exponent + 1;
bit_drop = bit_drop >= 0 ? bit_drop : 0;
int word_drop = bit_drop / 64;
bit_drop %= 64;
fputil::UInt<256> man4;
for (size_t i = 0; i < 4; ++i) {
man4[3 - i] = inv_y_bits[word_drop + i];
}
man4.shift_left(bit_drop);
fputil::UInt<192> man_bits;
for (size_t i = 0; i < 3; ++i)
man_bits[i] = man4[i + 1];
fputil::XFloat<192> result(inv_y_exponent - word_drop * 64 - bit_drop,
man_bits);
result.mul(x);
result.drop_int(); // |result| now holds fractional part of x/y.
fputil::UInt<192> y_man;
for (size_t i = 0; i < 3; ++i)
y_man[i] = y_bits[2 - i];
fputil::XFloat<192> y_192(y_exponent, y_man);
return result.mul(y_192);
}
static constexpr int TwoPIExponent = 2;
// The mantissa bits of 2 * PI.
// The most signification bits are in the first uint64_t word
// and the least signification bits are in the last word. The
// first word includes the implicit '1' bit.
static constexpr uint64_t TwoPI[] = {0xc90fdaa22168c234, 0xc4c6628b80dc1cd1,
0x29024e088a67cc74};
static constexpr int InvTwoPIExponent = -3;
// The mantissa bits of 1/(2 * PI).
// The most signification bits are in the first uint64_t word
// and the least signification bits are in the last word. The
// first word includes the implicit '1' bit.
static constexpr uint64_t InvTwoPI[] = {
0xa2f9836e4e441529, 0xfc2757d1f534ddc0, 0xdb6295993c439041,
0xfe5163abdebbc561, 0xb7246e3a424dd2e0, 0x6492eea09d1921c,
0xfe1deb1cb129a73e, 0xe88235f52ebb4484, 0xe99c7026b45f7e41,
0x3991d639835339f4, 0x9c845f8bbdf9283b, 0x1ff897ffde05980f,
0xef2f118b5a0a6d1f, 0x6d367ecf27cb09b7, 0x4f463f669e5fea2d,
0x7527bac7ebe5f17b, 0x3d0739f78a5292ea, 0x6bfb5fb11f8d5d08,
0x56033046fc7b6bab, 0xf0cfbc209af4361e};
double mod_2pi(double x) {
static constexpr double _2pi = 6.283185307179586;
if (x < _2pi)
return x;
return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent, InvTwoPIExponent);
}
// Returns mod(x, pi/2)
double mod_pi_over_2(double x) {
static constexpr double pi_over_2 = 1.5707963267948966;
if (x < pi_over_2)
return x;
return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 2, InvTwoPIExponent + 2);
}
// Returns mod(x, pi/4)
double mod_pi_over_4(double x) {
static constexpr double pi_over_4 = 0.7853981633974483;
if (x < pi_over_4)
return x;
return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 3, InvTwoPIExponent + 3);
}
} // namespace __llvm_libc

View File

@ -0,0 +1,22 @@
//===-- Utilities for double precision trigonometric functions --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H
#define LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H
namespace __llvm_libc {
double mod_2pi(double);
double mod_pi_over_2(double);
double mod_pi_over_4(double);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H

View File

@ -1169,6 +1169,18 @@ add_fp_unittest(
libc.src.__support.FPUtil.fputil
)
add_fp_unittest(
mod_k_pi_test
NEED_MPFR
SUITE
libc-long-running-tests
SRCS
mod_k_pi_test.cpp
DEPENDS
libc.src.math.generic.dp_trig
libc.src.__support.FPUtil.fputil
)
add_subdirectory(generic)
add_subdirectory(exhaustive)
add_subdirectory(differential_testing)

View File

@ -0,0 +1,56 @@
//===-- Unittests mod_2pi, mod_pi_over_4 and mod_pi_over_2 ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/__support/FPUtil/TestHelpers.h"
#include "src/math/generic/dp_trig.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
#include "utils/UnitTest/Test.h"
#include <math.h>
namespace mpfr = __llvm_libc::testing::mpfr;
using FPBits = __llvm_libc::fputil::FPBits<double>;
using UIntType = FPBits::UIntType;
TEST(LlvmLibcMod2PITest, Range) {
constexpr UIntType count = 1000000000;
constexpr UIntType step = UIntType(-1) / count;
for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
double x = double(FPBits(v));
if (isnan(x) || isinf(x) || x <= 0.0)
continue;
ASSERT_MPFR_MATCH(mpfr::Operation::Mod2PI, x, __llvm_libc::mod_2pi(x), 0);
}
}
TEST(LlvmLibcModPIOver2Test, Range) {
constexpr UIntType count = 1000000000;
constexpr UIntType step = UIntType(-1) / count;
for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
double x = double(FPBits(v));
if (isnan(x) || isinf(x) || x <= 0.0)
continue;
ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver2, x,
__llvm_libc::mod_pi_over_2(x), 0);
}
}
TEST(LlvmLibcModPIOver4Test, Range) {
constexpr UIntType count = 1000000000;
constexpr UIntType step = UIntType(-1) / count;
for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
double x = double(FPBits(v));
if (isnan(x) || isinf(x) || x <= 0.0)
continue;
ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver4, x,
__llvm_libc::mod_pi_over_4(x), 0);
}
}

View File

@ -62,7 +62,7 @@ class MPFRNumber {
mpfr_t value;
public:
MPFRNumber() : mpfrPrecision(128) { mpfr_init2(value, mpfrPrecision); }
MPFRNumber() : mpfrPrecision(256) { mpfr_init2(value, mpfrPrecision); }
// We use explicit EnableIf specializations to disallow implicit
// conversions. Implicit conversions can potentially lead to loss of
@ -202,6 +202,33 @@ public:
return result;
}
MPFRNumber mod_2pi() const {
MPFRNumber result(0.0, 1280);
MPFRNumber _2pi(0.0, 1280);
mpfr_const_pi(_2pi.value, MPFR_RNDN);
mpfr_mul_si(_2pi.value, _2pi.value, 2, MPFR_RNDN);
mpfr_fmod(result.value, value, _2pi.value, MPFR_RNDN);
return result;
}
MPFRNumber mod_pi_over_2() const {
MPFRNumber result(0.0, 1280);
MPFRNumber pi_over_2(0.0, 1280);
mpfr_const_pi(pi_over_2.value, MPFR_RNDN);
mpfr_mul_d(pi_over_2.value, pi_over_2.value, 0.5, MPFR_RNDN);
mpfr_fmod(result.value, value, pi_over_2.value, MPFR_RNDN);
return result;
}
MPFRNumber mod_pi_over_4() const {
MPFRNumber result(0.0, 1280);
MPFRNumber pi_over_4(0.0, 1280);
mpfr_const_pi(pi_over_4.value, MPFR_RNDN);
mpfr_mul_d(pi_over_4.value, pi_over_4.value, 0.25, MPFR_RNDN);
mpfr_fmod(result.value, value, pi_over_4.value, MPFR_RNDN);
return result;
}
MPFRNumber sin() const {
MPFRNumber result;
mpfr_sin(result.value, value, MPFR_RNDN);
@ -281,6 +308,9 @@ public:
template <typename T>
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, double> ulp(T input) {
T thisAsT = as<T>();
if (thisAsT == input)
return T(0.0);
int thisExponent = fputil::FPBits<T>(thisAsT).getExponent();
int inputExponent = fputil::FPBits<T>(input).getExponent();
if (thisAsT * input < 0 || thisExponent == inputExponent) {
@ -339,6 +369,12 @@ unaryOperation(Operation op, InputType input) {
return mpfrInput.expm1();
case Operation::Floor:
return mpfrInput.floor();
case Operation::Mod2PI:
return mpfrInput.mod_2pi();
case Operation::ModPIOver2:
return mpfrInput.mod_pi_over_2();
case Operation::ModPIOver4:
return mpfrInput.mod_pi_over_4();
case Operation::Round:
return mpfrInput.round();
case Operation::Sin:

View File

@ -30,6 +30,9 @@ enum class Operation : int {
Exp2,
Expm1,
Floor,
Mod2PI,
ModPIOver2,
ModPIOver4,
Round,
Sin,
Sqrt,