forked from OSchip/llvm-project
[flang] Fix x87 binary->decimal
Summary: Fix decimal formatting of 80-bit x87 values; the calculation ofnearest neighbor values failed to account for the explicitmost significant bit in that format. Replace MultiplyByRounded with MultiplyBy in binary->decimal conversions, since rounding won't happen and the name was misleading; then remove dead code, and migrate LoseLeastSignificantDigit() from one source file to another where it's still needed. Reviewers: tskeith, sscalpone, jdoerfert, DavidTruby Reviewed By: tskeith Subscribers: llvm-commits, flang-commits Tags: #flang, #llvm Differential Revision: https://reviews.llvm.org/D79345
This commit is contained in:
parent
ac9e8b3a7e
commit
6fec2c4402
|
@ -12,8 +12,11 @@
|
|||
#ifndef FORTRAN_COMMON_UINT128_H_
|
||||
#define FORTRAN_COMMON_UINT128_H_
|
||||
|
||||
// Define AVOID_NATIVE_UINT128_T to force the use of UnsignedInt128 below
|
||||
// instead of the C++ compiler's native 128-bit unsigned integer type, if
|
||||
// it has one.
|
||||
#ifndef AVOID_NATIVE_UINT128_T
|
||||
#define AVOID_NATIVE_UINT128_T 1 // always use this code for now for testing
|
||||
#define AVOID_NATIVE_UINT128_T 0
|
||||
#endif
|
||||
|
||||
#include "leading-zero-bit-count.h"
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
namespace Fortran::decimal {
|
||||
|
||||
template <int BINARY_PRECISION>
|
||||
struct BinaryFloatingPointNumber
|
||||
: public common::RealDetails<BINARY_PRECISION> {
|
||||
|
||||
class BinaryFloatingPointNumber : public common::RealDetails<BINARY_PRECISION> {
|
||||
public:
|
||||
using Details = common::RealDetails<BINARY_PRECISION>;
|
||||
using Details::bits;
|
||||
using Details::decimalPrecision;
|
||||
|
@ -50,21 +49,23 @@ struct BinaryFloatingPointNumber
|
|||
constexpr BinaryFloatingPointNumber &operator=(
|
||||
BinaryFloatingPointNumber &&that) = default;
|
||||
|
||||
RawType raw() const { return raw_; }
|
||||
|
||||
template <typename A> explicit constexpr BinaryFloatingPointNumber(A x) {
|
||||
static_assert(sizeof raw <= sizeof x);
|
||||
std::memcpy(reinterpret_cast<void *>(&raw),
|
||||
reinterpret_cast<const void *>(&x), sizeof raw);
|
||||
static_assert(sizeof raw_ <= sizeof x);
|
||||
std::memcpy(reinterpret_cast<void *>(&raw_),
|
||||
reinterpret_cast<const void *>(&x), sizeof raw_);
|
||||
}
|
||||
|
||||
constexpr int BiasedExponent() const {
|
||||
return static_cast<int>(
|
||||
(raw >> significandBits) & ((1 << exponentBits) - 1));
|
||||
(raw_ >> significandBits) & ((1 << exponentBits) - 1));
|
||||
}
|
||||
constexpr int UnbiasedExponent() const {
|
||||
int biased{BiasedExponent()};
|
||||
return biased - exponentBias + (biased == 0);
|
||||
}
|
||||
constexpr RawType Significand() const { return raw & significandMask; }
|
||||
constexpr RawType Significand() const { return raw_ & significandMask; }
|
||||
constexpr RawType Fraction() const {
|
||||
RawType sig{Significand()};
|
||||
if (isImplicitMSB && BiasedExponent() > 0) {
|
||||
|
@ -74,7 +75,7 @@ struct BinaryFloatingPointNumber
|
|||
}
|
||||
|
||||
constexpr bool IsZero() const {
|
||||
return (raw & ((RawType{1} << (bits - 1)) - 1)) == 0;
|
||||
return (raw_ & ((RawType{1} << (bits - 1)) - 1)) == 0;
|
||||
}
|
||||
constexpr bool IsNaN() const {
|
||||
return BiasedExponent() == maxExponent && Significand() != 0;
|
||||
|
@ -86,11 +87,39 @@ struct BinaryFloatingPointNumber
|
|||
return BiasedExponent() == maxExponent - 1 &&
|
||||
Significand() == significandMask;
|
||||
}
|
||||
constexpr bool IsNegative() const { return ((raw >> (bits - 1)) & 1) != 0; }
|
||||
constexpr bool IsNegative() const { return ((raw_ >> (bits - 1)) & 1) != 0; }
|
||||
|
||||
constexpr void Negate() { raw ^= RawType{1} << (bits - 1); }
|
||||
constexpr void Negate() { raw_ ^= RawType{1} << (bits - 1); }
|
||||
|
||||
RawType raw{0};
|
||||
// For calculating the nearest neighbors of a floating-point value
|
||||
constexpr void Previous() {
|
||||
RemoveExplicitMSB();
|
||||
--raw_;
|
||||
InsertExplicitMSB();
|
||||
}
|
||||
constexpr void Next() {
|
||||
RemoveExplicitMSB();
|
||||
++raw_;
|
||||
InsertExplicitMSB();
|
||||
}
|
||||
|
||||
private:
|
||||
constexpr void RemoveExplicitMSB() {
|
||||
if constexpr (!isImplicitMSB) {
|
||||
raw_ = (raw_ & (significandMask >> 1)) | ((raw_ & ~significandMask) >> 1);
|
||||
}
|
||||
}
|
||||
constexpr void InsertExplicitMSB() {
|
||||
if constexpr (!isImplicitMSB) {
|
||||
constexpr RawType mask{significandMask >> 1};
|
||||
raw_ = (raw_ & mask) | ((raw_ & ~mask) << 1);
|
||||
if (BiasedExponent() > 0) {
|
||||
raw_ |= RawType{1} << (significandBits - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RawType raw_{0};
|
||||
};
|
||||
} // namespace Fortran::decimal
|
||||
#endif
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "flang/Common/unsigned-const-division.h"
|
||||
#include "flang/Decimal/binary-floating-point.h"
|
||||
#include "flang/Decimal/decimal.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cinttypes>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
@ -111,6 +112,8 @@ public:
|
|||
void Minimize(
|
||||
BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
|
||||
|
||||
llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
|
||||
|
||||
private:
|
||||
BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that)
|
||||
: digits_{that.digits_}, exponent_{that.exponent_},
|
||||
|
@ -283,14 +286,6 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
template <int N> void MultiplyByRounded() {
|
||||
if (int carry{MultiplyBy<N>()}) {
|
||||
LoseLeastSignificantDigit();
|
||||
digit_[digits_ - 1] += carry;
|
||||
exponent_ += log10Radix;
|
||||
}
|
||||
}
|
||||
|
||||
void LoseLeastSignificantDigit(); // with rounding
|
||||
|
||||
void PushCarry(int carry) {
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include "big-radix-floating-point.h"
|
||||
#include "flang/Decimal/decimal.h"
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
|
||||
namespace Fortran::decimal {
|
||||
|
||||
|
@ -54,17 +56,18 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::BigRadixFloatingPointNumber(
|
|||
++exponent_;
|
||||
}
|
||||
|
||||
int overflow{0};
|
||||
for (; twoPow >= 9; twoPow -= 9) {
|
||||
// D * 10.**E * 2.**twoPow -> (D*(2**9)) * 10.**E * 2.**(twoPow-9)
|
||||
MultiplyByRounded<512>();
|
||||
overflow |= MultiplyBy<512>();
|
||||
}
|
||||
for (; twoPow >= 3; twoPow -= 3) {
|
||||
// D * 10.**E * 2.**twoPow -> (D*(2**3)) * 10.**E * 2.**(twoPow-3)
|
||||
MultiplyByRounded<8>();
|
||||
overflow |= MultiplyBy<8>();
|
||||
}
|
||||
for (; twoPow > 0; --twoPow) {
|
||||
// D * 10.**E * 2.**twoPow -> (2*D) * 10.**E * 2.**(twoPow-1)
|
||||
MultiplyByRounded<2>();
|
||||
overflow |= MultiplyBy<2>();
|
||||
}
|
||||
|
||||
while (twoPow < 0) {
|
||||
|
@ -85,21 +88,23 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::BigRadixFloatingPointNumber(
|
|||
|
||||
for (; twoPow <= -4; twoPow += 4) {
|
||||
// D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4)
|
||||
MultiplyByRounded<(5 * 5 * 5 * 5)>();
|
||||
overflow |= MultiplyBy<(5 * 5 * 5 * 5)>();
|
||||
exponent_ -= 4;
|
||||
}
|
||||
if (twoPow <= -2) {
|
||||
// D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2)
|
||||
MultiplyByRounded<25>();
|
||||
overflow |= MultiplyBy<5 * 5>();
|
||||
twoPow += 2;
|
||||
exponent_ -= 2;
|
||||
}
|
||||
for (; twoPow < 0; ++twoPow) {
|
||||
// D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1)
|
||||
MultiplyByRounded<5>();
|
||||
overflow |= MultiplyBy<5>();
|
||||
--exponent_;
|
||||
}
|
||||
|
||||
assert(overflow == 0);
|
||||
|
||||
// twoPow == 0, the decimal encoding is complete.
|
||||
Normalize();
|
||||
}
|
||||
|
@ -299,37 +304,6 @@ void BigRadixFloatingPointNumber<PREC, LOG10RADIX>::Minimize(
|
|||
Normalize();
|
||||
}
|
||||
|
||||
template <int PREC, int LOG10RADIX>
|
||||
void BigRadixFloatingPointNumber<PREC,
|
||||
LOG10RADIX>::LoseLeastSignificantDigit() {
|
||||
Digit LSD{digit_[0]};
|
||||
for (int j{0}; j < digits_ - 1; ++j) {
|
||||
digit_[j] = digit_[j + 1];
|
||||
}
|
||||
digit_[digits_ - 1] = 0;
|
||||
bool incr{false};
|
||||
switch (rounding_) {
|
||||
case RoundNearest:
|
||||
case RoundDefault:
|
||||
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
|
||||
break;
|
||||
case RoundUp:
|
||||
incr = LSD > 0 && !isNegative_;
|
||||
break;
|
||||
case RoundDown:
|
||||
incr = LSD > 0 && isNegative_;
|
||||
break;
|
||||
case RoundToZero:
|
||||
break;
|
||||
case RoundCompatible:
|
||||
incr = LSD >= radix / 2;
|
||||
break;
|
||||
}
|
||||
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
|
||||
digit_[j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <int PREC>
|
||||
ConversionToDecimalResult ConvertToDecimal(char *buffer, std::size_t size,
|
||||
enum DecimalConversionFlags flags, int digits,
|
||||
|
@ -358,12 +332,13 @@ ConversionToDecimalResult ConvertToDecimal(char *buffer, std::size_t size,
|
|||
// decimal sequence in that range.
|
||||
using Binary = typename Big::Real;
|
||||
Binary less{x};
|
||||
--less.raw;
|
||||
less.Previous();
|
||||
Binary more{x};
|
||||
if (!x.IsMaximalFiniteMagnitude()) {
|
||||
++more.raw;
|
||||
more.Next();
|
||||
}
|
||||
number.Minimize(Big{less, rounding}, Big{more, rounding});
|
||||
} else {
|
||||
}
|
||||
return number.ConvertToDecimal(buffer, size, flags, digits);
|
||||
}
|
||||
|
@ -412,4 +387,22 @@ ConversionToDecimalResult ConvertLongDoubleToDecimal(char *buffer,
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int PREC, int LOG10RADIX>
|
||||
llvm::raw_ostream &BigRadixFloatingPointNumber<PREC, LOG10RADIX>::Dump(
|
||||
llvm::raw_ostream &o) const {
|
||||
if (isNegative_) {
|
||||
o << '-';
|
||||
}
|
||||
o << "10**(" << exponent_ << ") * ...\n";
|
||||
for (int j{digits_}; --j >= 0;) {
|
||||
std::string str{std::to_string(digit_[j])};
|
||||
o << std::string(20 - str.size(), ' ') << str << " [" << j << ']';
|
||||
if (j + 1 == digitLimit_) {
|
||||
o << " (limit)";
|
||||
}
|
||||
o << '\n';
|
||||
}
|
||||
return o;
|
||||
}
|
||||
} // namespace Fortran::decimal
|
||||
|
|
|
@ -139,6 +139,37 @@ bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
|
|||
return true;
|
||||
}
|
||||
|
||||
template <int PREC, int LOG10RADIX>
|
||||
void BigRadixFloatingPointNumber<PREC,
|
||||
LOG10RADIX>::LoseLeastSignificantDigit() {
|
||||
Digit LSD{digit_[0]};
|
||||
for (int j{0}; j < digits_ - 1; ++j) {
|
||||
digit_[j] = digit_[j + 1];
|
||||
}
|
||||
digit_[digits_ - 1] = 0;
|
||||
bool incr{false};
|
||||
switch (rounding_) {
|
||||
case RoundNearest:
|
||||
case RoundDefault:
|
||||
incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
|
||||
break;
|
||||
case RoundUp:
|
||||
incr = LSD > 0 && !isNegative_;
|
||||
break;
|
||||
case RoundDown:
|
||||
incr = LSD > 0 && isNegative_;
|
||||
break;
|
||||
case RoundToZero:
|
||||
break;
|
||||
case RoundCompatible:
|
||||
incr = LSD >= radix / 2;
|
||||
break;
|
||||
}
|
||||
for (int j{0}; (digit_[j] += incr) == radix; ++j) {
|
||||
digit_[j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// This local utility class represents an unrounded nonnegative
|
||||
// binary floating-point value with an unbiased (i.e., signed)
|
||||
// binary exponent, an integer value (not a fraction) with an implied
|
||||
|
|
|
@ -396,8 +396,8 @@ bool RealOutputEditing<binaryPrecision>::Edit(const DataEdit &edit) {
|
|||
case 'B':
|
||||
case 'O':
|
||||
case 'Z':
|
||||
return EditIntegerOutput(
|
||||
io_, edit, decimal::BinaryFloatingPointNumber<binaryPrecision>{x_}.raw);
|
||||
return EditIntegerOutput(io_, edit,
|
||||
decimal::BinaryFloatingPointNumber<binaryPrecision>{x_}.raw());
|
||||
case 'G':
|
||||
return Edit(EditForGOutput(edit));
|
||||
default:
|
||||
|
|
Loading…
Reference in New Issue