forked from OSchip/llvm-project
331 lines
10 KiB
C
331 lines
10 KiB
C
|
//===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
|
||
|
#define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
|
||
|
|
||
|
// This is a helper class for use in floating-point conversions
|
||
|
// between binary decimal representations. It holds a multiple-precision
|
||
|
// integer value using digits of a radix that is a large even power of ten
|
||
|
// (10,000,000,000,000,000 by default, 10**16). These digits are accompanied
|
||
|
// by a signed exponent that denotes multiplication by a power of ten.
|
||
|
// The effective radix point is to the right of the digits (i.e., they do
|
||
|
// not represent a fraction).
|
||
|
//
|
||
|
// The operations supported by this class are limited to those required
|
||
|
// for conversions between binary and decimal representations; it is not
|
||
|
// a general-purpose facility.
|
||
|
|
||
|
#include "flang/Common/bit-population-count.h"
|
||
|
#include "flang/Common/leading-zero-bit-count.h"
|
||
|
#include "flang/Common/uint128.h"
|
||
|
#include "flang/Common/unsigned-const-division.h"
|
||
|
#include "flang/Decimal/binary-floating-point.h"
|
||
|
#include "flang/Decimal/decimal.h"
|
||
|
#include <cinttypes>
|
||
|
#include <limits>
|
||
|
#include <type_traits>
|
||
|
|
||
|
namespace Fortran::decimal {
|
||
|
|
||
|
static constexpr std::uint64_t TenToThe(int power) {
|
||
|
return power <= 0 ? 1 : 10 * TenToThe(power - 1);
|
||
|
}
|
||
|
|
||
|
// 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be
|
||
|
// even, so that pairs of decimal digits do not straddle Digits.
|
||
|
// So LOG10RADIX must be 16 or 6.
|
||
|
template<int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
|
||
|
public:
|
||
|
using Real = BinaryFloatingPointNumber<PREC>;
|
||
|
static constexpr int log10Radix{LOG10RADIX};
|
||
|
|
||
|
private:
|
||
|
static constexpr std::uint64_t uint64Radix{TenToThe(log10Radix)};
|
||
|
static constexpr int minDigitBits{
|
||
|
64 - common::LeadingZeroBitCount(uint64Radix)};
|
||
|
using Digit = common::HostUnsignedIntType<minDigitBits>;
|
||
|
static constexpr Digit radix{uint64Radix};
|
||
|
static_assert(radix < std::numeric_limits<Digit>::max() / 1000,
|
||
|
"radix is somehow too big");
|
||
|
static_assert(radix > std::numeric_limits<Digit>::max() / 10000,
|
||
|
"radix is somehow too small");
|
||
|
|
||
|
// The base-2 logarithm of the least significant bit that can arise
|
||
|
// in a subnormal IEEE floating-point number.
|
||
|
static constexpr int minLog2AnyBit{
|
||
|
-Real::exponentBias - Real::binaryPrecision};
|
||
|
|
||
|
// The number of Digits needed to represent the smallest subnormal.
|
||
|
static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
|
||
|
|
||
|
public:
|
||
|
explicit BigRadixFloatingPointNumber(
|
||
|
enum FortranRounding rounding = RoundDefault)
|
||
|
: rounding_{rounding} {}
|
||
|
|
||
|
// Converts a binary floating point value.
|
||
|
explicit BigRadixFloatingPointNumber(
|
||
|
Real, enum FortranRounding = RoundDefault);
|
||
|
|
||
|
BigRadixFloatingPointNumber &SetToZero() {
|
||
|
isNegative_ = false;
|
||
|
digits_ = 0;
|
||
|
exponent_ = 0;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
// Converts decimal floating-point to binary.
|
||
|
ConversionToBinaryResult<PREC> ConvertToBinary();
|
||
|
|
||
|
// Parses and converts to binary. Handles leading spaces,
|
||
|
// "NaN", & optionally-signed "Inf". Does not skip internal
|
||
|
// spaces.
|
||
|
// The argument is a reference to a pointer that is left
|
||
|
// pointing to the first character that wasn't parsed.
|
||
|
ConversionToBinaryResult<PREC> ConvertToBinary(const char *&);
|
||
|
|
||
|
// Formats a decimal floating-point number to a user buffer.
|
||
|
// May emit "NaN" or "Inf", or an possibly-signed integer.
|
||
|
// No decimal point is written, but if it were, it would be
|
||
|
// after the last digit; the effective decimal exponent is
|
||
|
// returned as part of the result structure so that it can be
|
||
|
// formatted by the client.
|
||
|
ConversionToDecimalResult ConvertToDecimal(
|
||
|
char *, std::size_t, enum DecimalConversionFlags, int digits) const;
|
||
|
|
||
|
// Discard decimal digits not needed to distinguish this value
|
||
|
// from the decimal encodings of two others (viz., the nearest binary
|
||
|
// floating-point numbers immediately below and above this one).
|
||
|
// The last decimal digit may not be uniquely determined in all
|
||
|
// cases, and will be the mean value when that is so (e.g., if
|
||
|
// last decimal digit values 6-8 would all work, it'll be a 7).
|
||
|
// This minimization necessarily assumes that the value will be
|
||
|
// emitted and read back into the same (or less precise) format
|
||
|
// with default rounding to the nearest value.
|
||
|
void Minimize(
|
||
|
BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
|
||
|
|
||
|
private:
|
||
|
BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that)
|
||
|
: digits_{that.digits_}, exponent_{that.exponent_},
|
||
|
isNegative_{that.isNegative_}, rounding_{that.rounding_} {
|
||
|
for (int j{0}; j < digits_; ++j) {
|
||
|
digit_[j] = that.digit_[j];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool IsZero() const {
|
||
|
// Don't assume normalization.
|
||
|
for (int j{0}; j < digits_; ++j) {
|
||
|
if (digit_[j] != 0) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Predicate: true when 10*value would cause a carry.
|
||
|
// (When this happens during decimal-to-binary conversion,
|
||
|
// there are more digits in the input string than can be
|
||
|
// represented precisely.)
|
||
|
bool IsFull() const {
|
||
|
return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10;
|
||
|
}
|
||
|
|
||
|
// Sets *this to an unsigned integer value.
|
||
|
// Returns any remainder.
|
||
|
template<typename UINT> UINT SetTo(UINT n) {
|
||
|
static_assert(
|
||
|
std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
|
||
|
SetToZero();
|
||
|
while (n != 0) {
|
||
|
auto q{common::DivideUnsignedBy<UINT, 10>(n)};
|
||
|
if (n != q * 10) {
|
||
|
break;
|
||
|
}
|
||
|
++exponent_;
|
||
|
n = q;
|
||
|
}
|
||
|
if constexpr (sizeof n < sizeof(Digit)) {
|
||
|
if (n != 0) {
|
||
|
digit_[digits_++] = n;
|
||
|
}
|
||
|
return 0;
|
||
|
} else {
|
||
|
while (n != 0 && digits_ < digitLimit_) {
|
||
|
auto q{common::DivideUnsignedBy<UINT, radix>(n)};
|
||
|
digit_[digits_++] = static_cast<Digit>(n - q * radix);
|
||
|
n = q;
|
||
|
}
|
||
|
return n;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int RemoveLeastOrderZeroDigits() {
|
||
|
int remove{0};
|
||
|
if (digits_ > 0 && digit_[0] == 0) {
|
||
|
while (remove < digits_ && digit_[remove] == 0) {
|
||
|
++remove;
|
||
|
}
|
||
|
if (remove >= digits_) {
|
||
|
digits_ = 0;
|
||
|
} else if (remove > 0) {
|
||
|
for (int j{0}; j + remove < digits_; ++j) {
|
||
|
digit_[j] = digit_[j + remove];
|
||
|
}
|
||
|
digits_ -= remove;
|
||
|
}
|
||
|
}
|
||
|
return remove;
|
||
|
}
|
||
|
|
||
|
void RemoveLeadingZeroDigits() {
|
||
|
while (digits_ > 0 && digit_[digits_ - 1] == 0) {
|
||
|
--digits_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Normalize() {
|
||
|
RemoveLeadingZeroDigits();
|
||
|
exponent_ += RemoveLeastOrderZeroDigits() * log10Radix;
|
||
|
}
|
||
|
|
||
|
// This limited divisibility test only works for even divisors of the radix,
|
||
|
// which is fine since it's only ever used with 2 and 5.
|
||
|
template<int N> bool IsDivisibleBy() const {
|
||
|
static_assert(N > 1 && radix % N == 0, "bad modulus");
|
||
|
return digits_ == 0 || (digit_[0] % N) == 0;
|
||
|
}
|
||
|
|
||
|
template<unsigned DIVISOR> int DivideBy() {
|
||
|
Digit remainder{0};
|
||
|
for (int j{digits_ - 1}; j >= 0; --j) {
|
||
|
Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
|
||
|
Digit nrem{digit_[j] - DIVISOR * q};
|
||
|
digit_[j] = q + (radix / DIVISOR) * remainder;
|
||
|
remainder = nrem;
|
||
|
}
|
||
|
return remainder;
|
||
|
}
|
||
|
|
||
|
int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
|
||
|
int remainder{0};
|
||
|
for (int j{digits_ - 1}; j >= 0; --j) {
|
||
|
Digit q{digit_[j] >> twoPow};
|
||
|
int nrem = digit_[j] - (q << twoPow);
|
||
|
digit_[j] = q + (radix >> twoPow) * remainder;
|
||
|
remainder = nrem;
|
||
|
}
|
||
|
return remainder;
|
||
|
}
|
||
|
|
||
|
int AddCarry(int position = 0, int carry = 1) {
|
||
|
for (; position < digits_; ++position) {
|
||
|
Digit v{digit_[position] + carry};
|
||
|
if (v < radix) {
|
||
|
digit_[position] = v;
|
||
|
return 0;
|
||
|
}
|
||
|
digit_[position] = v - radix;
|
||
|
carry = 1;
|
||
|
}
|
||
|
if (digits_ < digitLimit_) {
|
||
|
digit_[digits_++] = carry;
|
||
|
return 0;
|
||
|
}
|
||
|
Normalize();
|
||
|
if (digits_ < digitLimit_) {
|
||
|
digit_[digits_++] = carry;
|
||
|
return 0;
|
||
|
}
|
||
|
return carry;
|
||
|
}
|
||
|
|
||
|
void Decrement() {
|
||
|
for (int j{0}; digit_[j]-- == 0; ++j) {
|
||
|
digit_[j] = radix - 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<int N> int MultiplyByHelper(int carry = 0) {
|
||
|
for (int j{0}; j < digits_; ++j) {
|
||
|
auto v{N * digit_[j] + carry};
|
||
|
carry = common::DivideUnsignedBy<Digit, radix>(v);
|
||
|
digit_[j] = v - carry * radix; // i.e., v % radix
|
||
|
}
|
||
|
return carry;
|
||
|
}
|
||
|
|
||
|
template<int N> int MultiplyBy(int carry = 0) {
|
||
|
if (int newCarry{MultiplyByHelper<N>(carry)}) {
|
||
|
return AddCarry(digits_, newCarry);
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<int N> int MultiplyWithoutNormalization() {
|
||
|
if (int carry{MultiplyByHelper<N>(0)}) {
|
||
|
if (digits_ < digitLimit_) {
|
||
|
digit_[digits_++] = carry;
|
||
|
return 0;
|
||
|
} else {
|
||
|
return carry;
|
||
|
}
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<int N> void MultiplyByRounded() {
|
||
|
if (int carry{MultiplyBy<N>()}) {
|
||
|
LoseLeastSignificantDigit();
|
||
|
digit_[digits_ - 1] += carry;
|
||
|
exponent_ += log10Radix;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void LoseLeastSignificantDigit(); // with rounding
|
||
|
|
||
|
void PushCarry(int carry) {
|
||
|
if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) {
|
||
|
LoseLeastSignificantDigit();
|
||
|
digit_[digits_ - 1] += carry;
|
||
|
} else {
|
||
|
digit_[digits_++] = carry;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Adds another number and then divides by two.
|
||
|
// Assumes same exponent and sign.
|
||
|
// Returns true when the the result has effectively been rounded down.
|
||
|
bool Mean(const BigRadixFloatingPointNumber &);
|
||
|
|
||
|
bool ParseNumber(const char *&, bool &inexact);
|
||
|
|
||
|
using Raw = typename Real::RawType;
|
||
|
constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); }
|
||
|
constexpr Raw Infinity() const {
|
||
|
return (Raw{Real::maxExponent} << Real::significandBits) | SignBit();
|
||
|
}
|
||
|
static constexpr Raw NaN() {
|
||
|
return (Raw{Real::maxExponent} << Real::significandBits) |
|
||
|
(Raw{1} << (Real::significandBits - 2));
|
||
|
}
|
||
|
|
||
|
Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
|
||
|
int digits_{0}; // # of elements in digit_[] array; zero when zero
|
||
|
int digitLimit_{maxDigits}; // precision clamp
|
||
|
int exponent_{0}; // signed power of ten
|
||
|
bool isNegative_{false};
|
||
|
enum FortranRounding rounding_ { RoundDefault };
|
||
|
};
|
||
|
}
|
||
|
#endif
|