[flang] Improve comments, clean up a couple of things

Original-commit: flang-compiler/f18@8d94d643b6
Reviewed-on: https://github.com/flang-compiler/f18/pull/671
Tree-same-pre-rewrite: false
This commit is contained in:
peter klausler 2019-08-21 11:46:46 -07:00
parent 1502542fe7
commit b4f34801c6
4 changed files with 89 additions and 70 deletions

View File

@ -16,10 +16,12 @@
#define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_
// This is a helper class for use in floating-point conversions // This is a helper class for use in floating-point conversions
// to and from decimal representations. It holds a multiple-precision // between binary decimal representations. It holds a multiple-precision
// integer value using digits of a radix that is a large even power of ten. // integer value using digits of a radix that is a large even power of ten
// The digits are accompanied by a signed exponent that denotes multiplication // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied
// by a power of ten. // by a signed exponent that denotes multiplication by a power of ten.
// The effective radix point is to the right of the digits (i.e., they do
// not represent a fraction).
// //
// The operations supported by this class are limited to those required // The operations supported by this class are limited to those required
// for conversions between binary and decimal representations; it is not // for conversions between binary and decimal representations; it is not
@ -63,6 +65,8 @@ private:
// in a subnormal IEEE floating-point number. // in a subnormal IEEE floating-point number.
static constexpr int minLog2AnyBit{ static constexpr int minLog2AnyBit{
-int{Real::exponentBias} - Real::precision}; -int{Real::exponentBias} - Real::precision};
// The number of Digits needed to represent the smallest subnormal.
static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix}; static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
public: public:
@ -84,18 +88,31 @@ public:
// Converts decimal floating-point to binary. // Converts decimal floating-point to binary.
ConversionToBinaryResult<PREC> ConvertToBinary(); ConversionToBinaryResult<PREC> ConvertToBinary();
// Parses and converts to binary. Also handles "NaN" & "Inf". // Parses and converts to binary. Handles leading spaces,
// The reference argument is a pointer that is left pointing to // "NaN", & optionally-signed "Inf". Does not skip internal
// the first character that wasn't included. // spaces.
// The argument is a reference to a pointer that is left
// pointing to the first character that wasn't parsed.
ConversionToBinaryResult<PREC> ConvertToBinary(const char *&); ConversionToBinaryResult<PREC> ConvertToBinary(const char *&);
// Formats a decimal floating-point number. // Formats a decimal floating-point number to a user buffer.
// May emit "NaN" or "Inf", or an possibly-signed integer.
// No decimal point is written, but if it were, it would be
// after the last digit; the effective decimal exponent is
// returned as part of the result structure so that it can be
// formatted by the client.
ConversionToDecimalResult ConvertToDecimal( ConversionToDecimalResult ConvertToDecimal(
char *, std::size_t, enum DecimalConversionFlags, int digits) const; char *, std::size_t, enum DecimalConversionFlags, int digits) const;
// Discard decimal digits not needed to distinguish this value // Discard decimal digits not needed to distinguish this value
// from the decimal encodings of two others (viz., the nearest binary // from the decimal encodings of two others (viz., the nearest binary
// floating-point numbers immediately below and above this one). // floating-point numbers immediately below and above this one).
// The last decimal digit may not be uniquely determined in all
// cases, and will be the mean value when that is so (e.g., if
// last decimal digit values 6-8 would all work, it'll be a 7).
// This minimization necessarily assumes that the value will be
// emitted and read back into the same (or less precise) format
// with default rounding to the nearest value.
void Minimize( void Minimize(
BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
@ -109,6 +126,7 @@ private:
} }
bool IsZero() const { bool IsZero() const {
// Don't assume normalization.
for (int j{0}; j < digits_; ++j) { for (int j{0}; j < digits_; ++j) {
if (digit_[j] != 0) { if (digit_[j] != 0) {
return false; return false;
@ -117,8 +135,6 @@ private:
return true; return true;
} }
bool IsOdd() const { return digits_ > 0 && (digit_[0] & 1); }
// Predicate: true when 10*value would cause a carry. // Predicate: true when 10*value would cause a carry.
// (When this happens during decimal-to-binary conversion, // (When this happens during decimal-to-binary conversion,
// there are more digits in the input string than can be // there are more digits in the input string than can be
@ -128,7 +144,7 @@ private:
} }
// Set to an unsigned integer value. // Set to an unsigned integer value.
// Returns any remainder (usually zero). // Returns any remainder.
template<typename UINT> UINT SetTo(UINT n) { template<typename UINT> UINT SetTo(UINT n) {
static_assert( static_assert(
std::is_same_v<UINT, __uint128_t> || std::is_unsigned_v<UINT>); std::is_same_v<UINT, __uint128_t> || std::is_unsigned_v<UINT>);
@ -186,7 +202,7 @@ private:
} }
// This limited divisibility test only works for even divisors of the radix, // This limited divisibility test only works for even divisors of the radix,
// which is fine since it's only used with 2 and 5. // which is fine since it's only ever used with 2 and 5.
template<int N> bool IsDivisibleBy() const { template<int N> bool IsDivisibleBy() const {
static_assert(N > 1 && radix % N == 0, "bad modulus"); static_assert(N > 1 && radix % N == 0, "bad modulus");
return digits_ == 0 || (digit_[0] % N) == 0; return digits_ == 0 || (digit_[0] % N) == 0;
@ -195,7 +211,6 @@ private:
template<unsigned DIVISOR> int DivideBy() { template<unsigned DIVISOR> int DivideBy() {
Digit remainder{0}; Digit remainder{0};
for (int j{digits_ - 1}; j >= 0; --j) { for (int j{digits_ - 1}; j >= 0; --j) {
// N.B. Because DIVISOR is a constant, these operations should be cheap.
Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])}; Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
Digit nrem{digit_[j] - DIVISOR * q}; Digit nrem{digit_[j] - DIVISOR * q};
digit_[j] = q + (radix / DIVISOR) * remainder; digit_[j] = q + (radix / DIVISOR) * remainder;
@ -273,31 +288,6 @@ private:
} }
} }
void LoseLeastSignificantDigit() {
if (digits_ >= 2) {
Digit LSD{digit_[0]};
for (int j{0}; j < digits_ - 1; ++j) {
digit_[j] = digit_[j + 1];
}
digit_[digits_ - 1] = 0;
exponent_ += log10Radix;
bool incr{false};
switch (rounding_) {
case RoundNearest:
case RoundDefault:
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
break;
case RoundUp: incr = LSD > 0 && !isNegative_; break;
case RoundDown: incr = LSD > 0 && isNegative_; break;
case RoundToZero: break;
case RoundCompatible: incr = LSD >= radix / 2; break;
}
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
digit_[j] = 0;
}
}
}
template<int N> void MultiplyByRounded() { template<int N> void MultiplyByRounded() {
if (int carry{MultiplyBy<N>()}) { if (int carry{MultiplyBy<N>()}) {
LoseLeastSignificantDigit(); LoseLeastSignificantDigit();
@ -305,6 +295,8 @@ private:
} }
} }
void LoseLeastSignificantDigit(); // with rounding
// Adds another number and then divides by two. // Adds another number and then divides by two.
// Assumes same exponent and sign. // Assumes same exponent and sign.
// Returns true when the the result has effectively been rounded down. // Returns true when the the result has effectively been rounded down.

View File

@ -15,7 +15,8 @@
#ifndef FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_ #ifndef FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
#define FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_ #define FORTRAN_DECIMAL_BINARY_FLOATING_POINT_H_
// Access the fields of an IEEE-754 binary floating-point value. // Access and manipulate the fields of an IEEE-754 binary
// floating-point value via a generalized template.
#include <cinttypes> #include <cinttypes>
#include <climits> #include <climits>
@ -33,7 +34,6 @@ template<int BITS> struct HostUnsignedIntTypeHelper {
template<int BITS> template<int BITS>
using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type; using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type;
namespace {
static constexpr int BitsForPrecision(int prec) { static constexpr int BitsForPrecision(int prec) {
switch (prec) { switch (prec) {
case 8: return 16; case 8: return 16;
@ -45,7 +45,6 @@ static constexpr int BitsForPrecision(int prec) {
default: return -1; default: return -1;
} }
} }
}
template<int PRECISION> struct BinaryFloatingPointNumber { template<int PRECISION> struct BinaryFloatingPointNumber {
static constexpr int precision{PRECISION}; static constexpr int precision{PRECISION};

View File

@ -294,6 +294,33 @@ void BigRadixFloatingPointNumber<PREC, LOG10RADIX>::Minimize(
Normalize(); Normalize();
} }
template<int PREC, int LOG10RADIX>
void BigRadixFloatingPointNumber<PREC,
LOG10RADIX>::LoseLeastSignificantDigit() {
if (digits_ >= 2) {
Digit LSD{digit_[0]};
for (int j{0}; j < digits_ - 1; ++j) {
digit_[j] = digit_[j + 1];
}
digit_[digits_ - 1] = 0;
exponent_ += log10Radix;
bool incr{false};
switch (rounding_) {
case RoundNearest:
case RoundDefault:
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
break;
case RoundUp: incr = LSD > 0 && !isNegative_; break;
case RoundDown: incr = LSD > 0 && isNegative_; break;
case RoundToZero: break;
case RoundCompatible: incr = LSD >= radix / 2; break;
}
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
digit_[j] = 0;
}
}
}
template<int PREC> template<int PREC>
ConversionToDecimalResult ConvertToDecimal(char *buffer, size_t size, ConversionToDecimalResult ConvertToDecimal(char *buffer, size_t size,
enum DecimalConversionFlags flags, int digits, enum DecimalConversionFlags flags, int digits,

View File

@ -50,35 +50,37 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
if (q == start || (q == start + 1 && *start == '.')) { if (q == start || (q == start + 1 && *start == '.')) {
return false; // require at least one digit return false; // require at least one digit
} }
const char *d{q}; // There's a valid number here; set the reference argument to point to
// the first character afterward.
p = q;
// Strip off trailing zeroes // Strip off trailing zeroes
if (point != nullptr) { if (point != nullptr) {
while (d > firstDigit && d[-1] == '0') { while (q > firstDigit && q[-1] == '0') {
--d; --q;
} }
if (d[-1] == '.') { if (q[-1] == '.') {
point = nullptr; point = nullptr;
--d; --q;
} }
} }
if (point == nullptr) { if (point == nullptr) {
while (d > firstDigit && d[-1] == '0') { while (q > firstDigit && q[-1] == '0') {
--d; --q;
++exponent_; ++exponent_;
} }
} }
if (d == firstDigit) { if (q == firstDigit) {
exponent_ = 0; // all zeros exponent_ = 0; // all zeros
} }
if (point != nullptr) { if (point != nullptr) {
exponent_ -= static_cast<int>(d - point - 1); exponent_ -= static_cast<int>(q - point - 1);
} }
// Trim any excess digits // Trim any excess digits
const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)}; const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
if (d > limit) { if (q > limit) {
inexact = true; inexact = true;
while (d-- > limit) { while (q-- > limit) {
if (*d == '.') { if (*q == '.') {
point = nullptr; point = nullptr;
--limit; --limit;
} else if (point == nullptr) { } else if (point == nullptr) {
@ -87,18 +89,19 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
} }
} }
// Rack the decimal digits up into big Digits. // Rack the decimal digits up into big Digits.
for (auto times{radix}; d-- > firstDigit;) { for (auto times{radix}; q-- > firstDigit;) {
if (*d != '.') { if (*q != '.') {
if (times == radix) { if (times == radix) {
digit_[digits_++] = *d - '0'; digit_[digits_++] = *q - '0';
times = 10; times = 10;
} else { } else {
digit_[digits_ - 1] += times * (*d - '0'); digit_[digits_ - 1] += times * (*q - '0');
times *= 10; times *= 10;
} }
} }
} }
// Look for an optional exponent field.
q = p;
switch (*q) { switch (*q) {
case 'e': case 'e':
case 'E': case 'E':
@ -115,6 +118,7 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
for (int j{0}; j < 8 && *q >= '0' && *q <= '9'; ++j) { for (int j{0}; j < 8 && *q >= '0' && *q <= '9'; ++j) {
expo = 10 * expo + *q++ - '0'; expo = 10 * expo + *q++ - '0';
} }
p = q; // exponent was valid
if (negExpo) { if (negExpo) {
exponent_ -= expo; exponent_ -= expo;
} else { } else {
@ -124,7 +128,6 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
} break; } break;
default: break; default: break;
} }
p = q;
return true; return true;
} }
@ -271,28 +274,21 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
} }
} }
// The value is not zero. // The value is not zero.
// x = D. * 10.**E
IntermediateFloat<PREC> f; IntermediateFloat<PREC> f;
#if 0 // actually a small loss
// Make the value odd.
if (int trailing0s{common::TrailingZeroBitCount(digit_[0])}) {
f.AdjustExponent(trailing0s);
for (; trailing0s > log10Radix; trailing0s -= log10Radix) {
DivideByPowerOfTwo(log10Radix);
}
DivideByPowerOfTwo(trailing0s);
}
#endif
// Shift our perspective on the radix (& decimal) point so that // Shift our perspective on the radix (& decimal) point so that
// it sits to the *left* of the digits. // it sits to the *left* of the digits: i.e., x = .D * 10.**E
exponent_ += digits_ * log10Radix; exponent_ += digits_ * log10Radix;
// Apply any negative decimal exponent by multiplication // Apply any negative decimal exponent by multiplication
// by a power of two, adjusting the binary exponent to compensate. // by a power of two, adjusting the binary exponent to compensate.
while (exponent_ < log10Radix) { while (exponent_ < log10Radix) {
// x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
f.AdjustExponent(-9); f.AdjustExponent(-9);
digitLimit_ = digits_; digitLimit_ = digits_;
int carry{MultiplyWithoutNormalization<512>()}; int carry{MultiplyWithoutNormalization<512>()};
RemoveLeastOrderZeroDigits(); RemoveLeastOrderZeroDigits();
if (carry != 0) { if (carry != 0) {
// x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
digit_[digits_++] = carry; digit_[digits_++] = carry;
exponent_ += log10Radix; exponent_ += log10Radix;
} }
@ -304,16 +300,19 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
digitLimit_ = digits_; digitLimit_ = digits_;
int carry; int carry;
if (exponent_ >= log10Radix + 4) { if (exponent_ >= log10Radix + 4) {
// x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
exponent_ -= 4; exponent_ -= 4;
carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
f.AdjustExponent(4); f.AdjustExponent(4);
} else { } else {
// x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
--exponent_; --exponent_;
carry = MultiplyWithoutNormalization<5>(); carry = MultiplyWithoutNormalization<5>();
f.AdjustExponent(1); f.AdjustExponent(1);
} }
RemoveLeastOrderZeroDigits(); RemoveLeastOrderZeroDigits();
if (carry != 0) { if (carry != 0) {
// x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
digit_[digits_++] = carry; digit_[digits_++] = carry;
exponent_ += log10Radix; exponent_ += log10Radix;
} }
@ -321,10 +320,12 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
// So exponent_ is now log10Radix, meaning that the // So exponent_ is now log10Radix, meaning that the
// MSD can be taken as an integer part and transferred // MSD can be taken as an integer part and transferred
// to the binary result. // to the binary result.
// x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
int guardShift{f.SetTo(digit_[--digits_])}; int guardShift{f.SetTo(digit_[--digits_])};
// Transfer additional bits until the result is normal. // Transfer additional bits until the result is normal.
digitLimit_ = digits_; digitLimit_ = digits_;
while (!f.IsFull()) { while (!f.IsFull()) {
// x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
f.AdjustExponent(-1); f.AdjustExponent(-1);
std::uint32_t carry = MultiplyWithoutNormalization<2>(); std::uint32_t carry = MultiplyWithoutNormalization<2>();
f.ShiftIn(carry); f.ShiftIn(carry);