forked from OSchip/llvm-project
[flang] Improve comments, clean up a couple of things
Original-commit: flang-compiler/f18@8d94d643b6 Reviewed-on: https://github.com/flang-compiler/f18/pull/671 Tree-same-pre-rewrite: false
This commit is contained in:
parent
1502542fe7
commit
b4f34801c6
|
@ -16,10 +16,12 @@
|
|||
#define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
|
||||
|
||||
// This is a helper class for use in floating-point conversions
|
||||
// to and from decimal representations. It holds a multiple-precision
|
||||
// integer value using digits of a radix that is a large even power of ten.
|
||||
// The digits are accompanied by a signed exponent that denotes multiplication
|
||||
// by a power of ten.
|
||||
// between binary decimal representations. It holds a multiple-precision
|
||||
// integer value using digits of a radix that is a large even power of ten
|
||||
// (10,000,000,000,000,000 by default, 10**16). These digits are accompanied
|
||||
// by a signed exponent that denotes multiplication by a power of ten.
|
||||
// The effective radix point is to the right of the digits (i.e., they do
|
||||
// not represent a fraction).
|
||||
//
|
||||
// The operations supported by this class are limited to those required
|
||||
// for conversions between binary and decimal representations; it is not
|
||||
|
@ -63,6 +65,8 @@ private:
|
|||
// in a subnormal IEEE floating-point number.
|
||||
static constexpr int minLog2AnyBit{
|
||||
-int{Real::exponentBias} - Real::precision};
|
||||
|
||||
// The number of Digits needed to represent the smallest subnormal.
|
||||
static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
|
||||
|
||||
public:
|
||||
|
@ -84,18 +88,31 @@ public:
|
|||
// Converts decimal floating-point to binary.
|
||||
ConversionToBinaryResult<PREC> ConvertToBinary();
|
||||
|
||||
// Parses and converts to binary. Also handles "NaN" & "Inf".
|
||||
// The reference argument is a pointer that is left pointing to
|
||||
// the first character that wasn't included.
|
||||
// Parses and converts to binary. Handles leading spaces,
|
||||
// "NaN", & optionally-signed "Inf". Does not skip internal
|
||||
// spaces.
|
||||
// The argument is a reference to a pointer that is left
|
||||
// pointing to the first character that wasn't parsed.
|
||||
ConversionToBinaryResult<PREC> ConvertToBinary(const char *&);
|
||||
|
||||
// Formats a decimal floating-point number.
|
||||
// Formats a decimal floating-point number to a user buffer.
|
||||
// May emit "NaN" or "Inf", or an possibly-signed integer.
|
||||
// No decimal point is written, but if it were, it would be
|
||||
// after the last digit; the effective decimal exponent is
|
||||
// returned as part of the result structure so that it can be
|
||||
// formatted by the client.
|
||||
ConversionToDecimalResult ConvertToDecimal(
|
||||
char *, std::size_t, enum DecimalConversionFlags, int digits) const;
|
||||
|
||||
// Discard decimal digits not needed to distinguish this value
|
||||
// from the decimal encodings of two others (viz., the nearest binary
|
||||
// floating-point numbers immediately below and above this one).
|
||||
// The last decimal digit may not be uniquely determined in all
|
||||
// cases, and will be the mean value when that is so (e.g., if
|
||||
// last decimal digit values 6-8 would all work, it'll be a 7).
|
||||
// This minimization necessarily assumes that the value will be
|
||||
// emitted and read back into the same (or less precise) format
|
||||
// with default rounding to the nearest value.
|
||||
void Minimize(
|
||||
BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
|
||||
|
||||
|
@ -109,6 +126,7 @@ private:
|
|||
}
|
||||
|
||||
bool IsZero() const {
|
||||
// Don't assume normalization.
|
||||
for (int j{0}; j < digits_; ++j) {
|
||||
if (digit_[j] != 0) {
|
||||
return false;
|
||||
|
@ -117,8 +135,6 @@ private:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool IsOdd() const { return digits_ > 0 && (digit_[0] & 1); }
|
||||
|
||||
// Predicate: true when 10*value would cause a carry.
|
||||
// (When this happens during decimal-to-binary conversion,
|
||||
// there are more digits in the input string than can be
|
||||
|
@ -128,7 +144,7 @@ private:
|
|||
}
|
||||
|
||||
// Set to an unsigned integer value.
|
||||
// Returns any remainder (usually zero).
|
||||
// Returns any remainder.
|
||||
template<typename UINT> UINT SetTo(UINT n) {
|
||||
static_assert(
|
||||
std::is_same_v<UINT, __uint128_t> || std::is_unsigned_v<UINT>);
|
||||
|
@ -186,7 +202,7 @@ private:
|
|||
}
|
||||
|
||||
// This limited divisibility test only works for even divisors of the radix,
|
||||
// which is fine since it's only used with 2 and 5.
|
||||
// which is fine since it's only ever used with 2 and 5.
|
||||
template<int N> bool IsDivisibleBy() const {
|
||||
static_assert(N > 1 && radix % N == 0, "bad modulus");
|
||||
return digits_ == 0 || (digit_[0] % N) == 0;
|
||||
|
@ -195,7 +211,6 @@ private:
|
|||
template<unsigned DIVISOR> int DivideBy() {
|
||||
Digit remainder{0};
|
||||
for (int j{digits_ - 1}; j >= 0; --j) {
|
||||
// N.B. Because DIVISOR is a constant, these operations should be cheap.
|
||||
Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
|
||||
Digit nrem{digit_[j] - DIVISOR * q};
|
||||
digit_[j] = q + (radix / DIVISOR) * remainder;
|
||||
|
@ -273,31 +288,6 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
void LoseLeastSignificantDigit() {
|
||||
if (digits_ >= 2) {
|
||||
Digit LSD{digit_[0]};
|
||||
for (int j{0}; j < digits_ - 1; ++j) {
|
||||
digit_[j] = digit_[j + 1];
|
||||
}
|
||||
digit_[digits_ - 1] = 0;
|
||||
exponent_ += log10Radix;
|
||||
bool incr{false};
|
||||
switch (rounding_) {
|
||||
case RoundNearest:
|
||||
case RoundDefault:
|
||||
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
|
||||
break;
|
||||
case RoundUp: incr = LSD > 0 && !isNegative_; break;
|
||||
case RoundDown: incr = LSD > 0 && isNegative_; break;
|
||||
case RoundToZero: break;
|
||||
case RoundCompatible: incr = LSD >= radix / 2; break;
|
||||
}
|
||||
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
|
||||
digit_[j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N> void MultiplyByRounded() {
|
||||
if (int carry{MultiplyBy<N>()}) {
|
||||
LoseLeastSignificantDigit();
|
||||
|
@ -305,6 +295,8 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
void LoseLeastSignificantDigit(); // with rounding
|
||||
|
||||
// Adds another number and then divides by two.
|
||||
// Assumes same exponent and sign.
|
||||
// Returns true when the the result has effectively been rounded down.
|
||||
|
|
|
@ -15,7 +15,8 @@
|
|||
#ifndef FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
|
||||
#define FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
|
||||
|
||||
// Access the fields of an IEEE-754 binary floating-point value.
|
||||
// Access and manipulate the fields of an IEEE-754 binary
|
||||
// floating-point value via a generalized template.
|
||||
|
||||
#include <cinttypes>
|
||||
#include <climits>
|
||||
|
@ -33,7 +34,6 @@ template<int BITS> struct HostUnsignedIntTypeHelper {
|
|||
template<int BITS>
|
||||
using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type;
|
||||
|
||||
namespace {
|
||||
static constexpr int BitsForPrecision(int prec) {
|
||||
switch (prec) {
|
||||
case 8: return 16;
|
||||
|
@ -45,7 +45,6 @@ static constexpr int BitsForPrecision(int prec) {
|
|||
default: return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int PRECISION> struct BinaryFloatingPointNumber {
|
||||
static constexpr int precision{PRECISION};
|
||||
|
|
|
@ -294,6 +294,33 @@ void BigRadixFloatingPointNumber<PREC, LOG10RADIX>::Minimize(
|
|||
Normalize();
|
||||
}
|
||||
|
||||
template<int PREC, int LOG10RADIX>
|
||||
void BigRadixFloatingPointNumber<PREC,
|
||||
LOG10RADIX>::LoseLeastSignificantDigit() {
|
||||
if (digits_ >= 2) {
|
||||
Digit LSD{digit_[0]};
|
||||
for (int j{0}; j < digits_ - 1; ++j) {
|
||||
digit_[j] = digit_[j + 1];
|
||||
}
|
||||
digit_[digits_ - 1] = 0;
|
||||
exponent_ += log10Radix;
|
||||
bool incr{false};
|
||||
switch (rounding_) {
|
||||
case RoundNearest:
|
||||
case RoundDefault:
|
||||
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
|
||||
break;
|
||||
case RoundUp: incr = LSD > 0 && !isNegative_; break;
|
||||
case RoundDown: incr = LSD > 0 && isNegative_; break;
|
||||
case RoundToZero: break;
|
||||
case RoundCompatible: incr = LSD >= radix / 2; break;
|
||||
}
|
||||
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
|
||||
digit_[j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int PREC>
|
||||
ConversionToDecimalResult ConvertToDecimal(char *buffer, size_t size,
|
||||
enum DecimalConversionFlags flags, int digits,
|
||||
|
|
|
@ -50,35 +50,37 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
|
|||
if (q == start || (q == start + 1 && *start == '.')) {
|
||||
return false; // require at least one digit
|
||||
}
|
||||
const char *d{q};
|
||||
// There's a valid number here; set the reference argument to point to
|
||||
// the first character afterward.
|
||||
p = q;
|
||||
// Strip off trailing zeroes
|
||||
if (point != nullptr) {
|
||||
while (d > firstDigit && d[-1] == '0') {
|
||||
--d;
|
||||
while (q > firstDigit && q[-1] == '0') {
|
||||
--q;
|
||||
}
|
||||
if (d[-1] == '.') {
|
||||
if (q[-1] == '.') {
|
||||
point = nullptr;
|
||||
--d;
|
||||
--q;
|
||||
}
|
||||
}
|
||||
if (point == nullptr) {
|
||||
while (d > firstDigit && d[-1] == '0') {
|
||||
--d;
|
||||
while (q > firstDigit && q[-1] == '0') {
|
||||
--q;
|
||||
++exponent_;
|
||||
}
|
||||
}
|
||||
if (d == firstDigit) {
|
||||
if (q == firstDigit) {
|
||||
exponent_ = 0; // all zeros
|
||||
}
|
||||
if (point != nullptr) {
|
||||
exponent_ -= static_cast<int>(d - point - 1);
|
||||
exponent_ -= static_cast<int>(q - point - 1);
|
||||
}
|
||||
// Trim any excess digits
|
||||
const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
|
||||
if (d > limit) {
|
||||
if (q > limit) {
|
||||
inexact = true;
|
||||
while (d-- > limit) {
|
||||
if (*d == '.') {
|
||||
while (q-- > limit) {
|
||||
if (*q == '.') {
|
||||
point = nullptr;
|
||||
--limit;
|
||||
} else if (point == nullptr) {
|
||||
|
@ -87,18 +89,19 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
|
|||
}
|
||||
}
|
||||
// Rack the decimal digits up into big Digits.
|
||||
for (auto times{radix}; d-- > firstDigit;) {
|
||||
if (*d != '.') {
|
||||
for (auto times{radix}; q-- > firstDigit;) {
|
||||
if (*q != '.') {
|
||||
if (times == radix) {
|
||||
digit_[digits_++] = *d - '0';
|
||||
digit_[digits_++] = *q - '0';
|
||||
times = 10;
|
||||
} else {
|
||||
digit_[digits_ - 1] += times * (*d - '0');
|
||||
digit_[digits_ - 1] += times * (*q - '0');
|
||||
times *= 10;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for an optional exponent field.
|
||||
q = p;
|
||||
switch (*q) {
|
||||
case 'e':
|
||||
case 'E':
|
||||
|
@ -115,6 +118,7 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
|
|||
for (int j{0}; j < 8 && *q >= '0' && *q <= '9'; ++j) {
|
||||
expo = 10 * expo + *q++ - '0';
|
||||
}
|
||||
p = q; // exponent was valid
|
||||
if (negExpo) {
|
||||
exponent_ -= expo;
|
||||
} else {
|
||||
|
@ -124,7 +128,6 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
|
|||
} break;
|
||||
default: break;
|
||||
}
|
||||
p = q;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -271,28 +274,21 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
|
|||
}
|
||||
}
|
||||
// The value is not zero.
|
||||
// x = D. * 10.**E
|
||||
IntermediateFloat<PREC> f;
|
||||
#if 0 // actually a small loss
|
||||
// Make the value odd.
|
||||
if (int trailing0s{common::TrailingZeroBitCount(digit_[0])}) {
|
||||
f.AdjustExponent(trailing0s);
|
||||
for (; trailing0s > log10Radix; trailing0s -= log10Radix) {
|
||||
DivideByPowerOfTwo(log10Radix);
|
||||
}
|
||||
DivideByPowerOfTwo(trailing0s);
|
||||
}
|
||||
#endif
|
||||
// Shift our perspective on the radix (& decimal) point so that
|
||||
// it sits to the *left* of the digits.
|
||||
// it sits to the *left* of the digits: i.e., x = .D * 10.**E
|
||||
exponent_ += digits_ * log10Radix;
|
||||
// Apply any negative decimal exponent by multiplication
|
||||
// by a power of two, adjusting the binary exponent to compensate.
|
||||
while (exponent_ < log10Radix) {
|
||||
// x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
|
||||
f.AdjustExponent(-9);
|
||||
digitLimit_ = digits_;
|
||||
int carry{MultiplyWithoutNormalization<512>()};
|
||||
RemoveLeastOrderZeroDigits();
|
||||
if (carry != 0) {
|
||||
// x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
|
||||
digit_[digits_++] = carry;
|
||||
exponent_ += log10Radix;
|
||||
}
|
||||
|
@ -304,16 +300,19 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
|
|||
digitLimit_ = digits_;
|
||||
int carry;
|
||||
if (exponent_ >= log10Radix + 4) {
|
||||
// x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
|
||||
exponent_ -= 4;
|
||||
carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
|
||||
f.AdjustExponent(4);
|
||||
} else {
|
||||
// x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
|
||||
--exponent_;
|
||||
carry = MultiplyWithoutNormalization<5>();
|
||||
f.AdjustExponent(1);
|
||||
}
|
||||
RemoveLeastOrderZeroDigits();
|
||||
if (carry != 0) {
|
||||
// x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
|
||||
digit_[digits_++] = carry;
|
||||
exponent_ += log10Radix;
|
||||
}
|
||||
|
@ -321,10 +320,12 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
|
|||
// So exponent_ is now log10Radix, meaning that the
|
||||
// MSD can be taken as an integer part and transferred
|
||||
// to the binary result.
|
||||
// x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
|
||||
int guardShift{f.SetTo(digit_[--digits_])};
|
||||
// Transfer additional bits until the result is normal.
|
||||
digitLimit_ = digits_;
|
||||
while (!f.IsFull()) {
|
||||
// x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
|
||||
f.AdjustExponent(-1);
|
||||
std::uint32_t carry = MultiplyWithoutNormalization<2>();
|
||||
f.ShiftIn(carry);
|
||||
|
|
Loading…
Reference in New Issue