forked from OSchip/llvm-project
[libc] Add implementations of ldexp[f|l].
The rounding behavior of NormalFloat to float format has been changed to round to nearest. Also, a bug in NormalFloat to subnormal number conversion has been fixed. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D91591
This commit is contained in:
parent
8e923ec2a8
commit
bb8f2585c6
|
@ -71,6 +71,9 @@ set(TARGET_LIBM_ENTRYPOINTS
|
|||
libc.src.math.ilogb
|
||||
libc.src.math.ilogbf
|
||||
libc.src.math.ilogbl
|
||||
libc.src.math.ldexp
|
||||
libc.src.math.ldexpf
|
||||
libc.src.math.ldexpl
|
||||
libc.src.math.logb
|
||||
libc.src.math.logbf
|
||||
libc.src.math.logbl
|
||||
|
|
|
@ -104,6 +104,9 @@ set(TARGET_LIBM_ENTRYPOINTS
|
|||
libc.src.math.ilogb
|
||||
libc.src.math.ilogbf
|
||||
libc.src.math.ilogbl
|
||||
libc.src.math.ldexp
|
||||
libc.src.math.ldexpf
|
||||
libc.src.math.ldexpl
|
||||
libc.src.math.logb
|
||||
libc.src.math.logbf
|
||||
libc.src.math.logbl
|
||||
|
|
|
@ -284,6 +284,10 @@ def StdC : StandardSpec<"stdc"> {
|
|||
FunctionSpec<"ilogbf", RetValSpec<IntType>, [ArgSpec<FloatType>]>,
|
||||
FunctionSpec<"ilogbl", RetValSpec<IntType>, [ArgSpec<LongDoubleType>]>,
|
||||
|
||||
FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
|
||||
FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
|
||||
FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
|
||||
|
||||
FunctionSpec<"logb", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
|
||||
FunctionSpec<"logbf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
|
||||
FunctionSpec<"logbl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
|
||||
|
|
|
@ -378,6 +378,42 @@ add_entrypoint_object(
|
|||
-O2
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
ldexp
|
||||
SRCS
|
||||
ldexp.cpp
|
||||
HDRS
|
||||
ldexp.h
|
||||
DEPENDS
|
||||
libc.utils.FPUtil.fputil
|
||||
COMPILE_OPTIONS
|
||||
-O2
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
ldexpf
|
||||
SRCS
|
||||
ldexpf.cpp
|
||||
HDRS
|
||||
ldexpf.h
|
||||
DEPENDS
|
||||
libc.utils.FPUtil.fputil
|
||||
COMPILE_OPTIONS
|
||||
-O2
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
ldexpl
|
||||
SRCS
|
||||
ldexpl.cpp
|
||||
HDRS
|
||||
ldexpl.h
|
||||
DEPENDS
|
||||
libc.utils.FPUtil.fputil
|
||||
COMPILE_OPTIONS
|
||||
-O2
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
logb
|
||||
SRCS
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
//===-- Implementation of ldexp function ----------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/__support/common.h"
|
||||
#include "utils/FPUtil/ManipulationFunctions.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
double LLVM_LIBC_ENTRYPOINT(ldexp)(double x, int exp) {
|
||||
return fputil::ldexp(x, exp);
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
|
@ -0,0 +1,18 @@
|
|||
//===-- Implementation header for ldexp -------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_MATH_LDEXP_H
|
||||
#define LLVM_LIBC_SRC_MATH_LDEXP_H
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
double ldexp(double x, int exp);
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_MATH_LDEXP_H
|
|
@ -0,0 +1,18 @@
|
|||
//===-- Implementation of ldexpf function ---------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/__support/common.h"
|
||||
#include "utils/FPUtil/ManipulationFunctions.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
float LLVM_LIBC_ENTRYPOINT(ldexpf)(float x, int exp) {
|
||||
return fputil::ldexp(x, exp);
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
|
@ -0,0 +1,18 @@
|
|||
//===-- Implementation header for ldexpf ------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_MATH_LDEXPF_H
|
||||
#define LLVM_LIBC_SRC_MATH_LDEXPF_H
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
float ldexpf(float x, int exp);
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_MATH_LDEXPF_H
|
|
@ -0,0 +1,18 @@
|
|||
//===-- Implementation of ldexpl function ---------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/__support/common.h"
|
||||
#include "utils/FPUtil/ManipulationFunctions.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
long double LLVM_LIBC_ENTRYPOINT(ldexpl)(long double x, int exp) {
|
||||
return fputil::ldexp(x, exp);
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
|
@ -0,0 +1,18 @@
|
|||
//===-- Implementation header for ldexpl ------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_MATH_ldexpl_H
|
||||
#define LLVM_LIBC_SRC_MATH_ldexpl_H
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
long double ldexpl(long double x, int exp);
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_MATH_ldexpl_H
|
|
@ -412,6 +412,48 @@ add_fp_unittest(
|
|||
libc.utils.FPUtil.fputil
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
ldexp_test
|
||||
SUITE
|
||||
libc_math_unittests
|
||||
SRCS
|
||||
ldexp_test.cpp
|
||||
HDRS
|
||||
LdExpTest.h
|
||||
DEPENDS
|
||||
libc.include.math
|
||||
libc.src.math.ldexp
|
||||
libc.utils.FPUtil.fputil
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
ldexpf_test
|
||||
SUITE
|
||||
libc_math_unittests
|
||||
SRCS
|
||||
ldexpf_test.cpp
|
||||
HDRS
|
||||
LdExpTest.h
|
||||
DEPENDS
|
||||
libc.include.math
|
||||
libc.src.math.ldexpf
|
||||
libc.utils.FPUtil.fputil
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
ldexpl_test
|
||||
SUITE
|
||||
libc_math_unittests
|
||||
SRCS
|
||||
ldexpl_test.cpp
|
||||
HDRS
|
||||
LdExpTest.h
|
||||
DEPENDS
|
||||
libc.include.math
|
||||
libc.src.math.ldexpl
|
||||
libc.utils.FPUtil.fputil
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
logb_test
|
||||
SUITE
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
//===-- Utility class to test different flavors of ldexp --------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
|
||||
#define LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
|
||||
|
||||
#include "utils/FPUtil/FPBits.h"
|
||||
#include "utils/FPUtil/NormalFloat.h"
|
||||
#include "utils/FPUtil/TestHelpers.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
template <typename T>
|
||||
class LdExpTestTemplate : public __llvm_libc::testing::Test {
|
||||
using FPBits = __llvm_libc::fputil::FPBits<T>;
|
||||
using NormalFloat = __llvm_libc::fputil::NormalFloat<T>;
|
||||
using UIntType = typename FPBits::UIntType;
|
||||
static constexpr UIntType mantissaWidth =
|
||||
__llvm_libc::fputil::MantissaWidth<T>::value;
|
||||
// A normalized mantissa to be used with tests.
|
||||
static constexpr UIntType mantissa = NormalFloat::one + 0x1234;
|
||||
|
||||
const T zero = __llvm_libc::fputil::FPBits<T>::zero();
|
||||
const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
|
||||
const T inf = __llvm_libc::fputil::FPBits<T>::inf();
|
||||
const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
|
||||
const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
|
||||
|
||||
public:
|
||||
typedef T (*LdExpFunc)(T, int);
|
||||
|
||||
void testSpecialNumbers(LdExpFunc func) {
|
||||
int expArray[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX};
|
||||
for (int exp : expArray) {
|
||||
ASSERT_FP_EQ(zero, func(zero, exp));
|
||||
ASSERT_FP_EQ(negZero, func(negZero, exp));
|
||||
ASSERT_FP_EQ(inf, func(inf, exp));
|
||||
ASSERT_FP_EQ(negInf, func(negInf, exp));
|
||||
ASSERT_NE(isnan(func(nan, exp)), 0);
|
||||
}
|
||||
}
|
||||
|
||||
void testPowersOfTwo(LdExpFunc func) {
|
||||
int32_t expArray[5] = {1, 2, 3, 4, 5};
|
||||
int32_t valArray[6] = {1, 2, 4, 8, 16, 32};
|
||||
for (int32_t exp : expArray) {
|
||||
for (int32_t val : valArray) {
|
||||
ASSERT_FP_EQ(T(val << exp), func(T(val), exp));
|
||||
ASSERT_FP_EQ(T(-1 * (val << exp)), func(T(-val), exp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void testOverflow(LdExpFunc func) {
|
||||
NormalFloat x(FPBits::maxExponent - 10, NormalFloat::one + 0xF00BA, 0);
|
||||
for (int32_t exp = 10; exp < 100; ++exp) {
|
||||
ASSERT_FP_EQ(inf, func(T(x), exp));
|
||||
ASSERT_FP_EQ(negInf, func(-T(x), exp));
|
||||
}
|
||||
}
|
||||
|
||||
void testUnderflowToZeroOnNormal(LdExpFunc func) {
|
||||
// In this test, we pass a normal nubmer to func and expect zero
|
||||
// to be returned due to underflow.
|
||||
int32_t baseExponent = FPBits::exponentBias + mantissaWidth;
|
||||
int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3,
|
||||
baseExponent + 2, baseExponent + 1};
|
||||
T x = NormalFloat(0, mantissa, 0);
|
||||
for (int32_t exp : expArray) {
|
||||
ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : negZero);
|
||||
}
|
||||
}
|
||||
|
||||
void testUnderflowToZeroOnSubnormal(LdExpFunc func) {
|
||||
// In this test, we pass a normal nubmer to func and expect zero
|
||||
// to be returned due to underflow.
|
||||
int32_t baseExponent = FPBits::exponentBias + mantissaWidth;
|
||||
int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3,
|
||||
baseExponent + 2, baseExponent + 1};
|
||||
T x = NormalFloat(-FPBits::exponentBias, mantissa, 0);
|
||||
for (int32_t exp : expArray) {
|
||||
ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : negZero);
|
||||
}
|
||||
}
|
||||
|
||||
void testNormalOperation(LdExpFunc func) {
|
||||
T valArray[] = {
|
||||
// Normal numbers
|
||||
NormalFloat(100, mantissa, 0), NormalFloat(-100, mantissa, 0),
|
||||
NormalFloat(100, mantissa, 1), NormalFloat(-100, mantissa, 1),
|
||||
// Subnormal numbers
|
||||
NormalFloat(-FPBits::exponentBias, mantissa, 0),
|
||||
NormalFloat(-FPBits::exponentBias, mantissa, 1)};
|
||||
for (int32_t exp = 0; exp <= static_cast<int32_t>(mantissaWidth); ++exp) {
|
||||
for (T x : valArray) {
|
||||
// We compare the result of ldexp with the result
|
||||
// of the native multiplication/division instruction.
|
||||
ASSERT_FP_EQ(func(x, exp), x * (UIntType(1) << exp));
|
||||
ASSERT_FP_EQ(func(x, -exp), x / (UIntType(1) << exp));
|
||||
}
|
||||
}
|
||||
|
||||
// Normal which trigger mantissa overflow.
|
||||
T x = NormalFloat(-FPBits::exponentBias + 1, 2 * NormalFloat::one - 1, 0);
|
||||
ASSERT_FP_EQ(func(x, -1), x / 2);
|
||||
ASSERT_FP_EQ(func(-x, -1), -x / 2);
|
||||
}
|
||||
};
|
||||
|
||||
#define LIST_LDEXP_TESTS(T, func) \
|
||||
using LdExpTest = LdExpTestTemplate<T>; \
|
||||
TEST_F(LdExpTest, SpecialNumbers) { testSpecialNumbers(&func); } \
|
||||
TEST_F(LdExpTest, PowersOfTwo) { testPowersOfTwo(&func); } \
|
||||
TEST_F(LdExpTest, OverFlow) { testOverflow(&func); } \
|
||||
TEST_F(LdExpTest, UnderflowToZeroOnNormal) { \
|
||||
testUnderflowToZeroOnNormal(&func); \
|
||||
} \
|
||||
TEST_F(LdExpTest, UnderflowToZeroOnSubnormal) { \
|
||||
testUnderflowToZeroOnSubnormal(&func); \
|
||||
} \
|
||||
TEST_F(LdExpTest, NormalOperation) { testNormalOperation(&func); }
|
||||
|
||||
#endif // LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
|
|
@ -0,0 +1,21 @@
|
|||
//===-- Unittests for ldexp -----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "LdExpTest.h"
|
||||
|
||||
#include "include/math.h"
|
||||
#include "src/math/ldexp.h"
|
||||
#include "utils/CPP/Functional.h"
|
||||
#include "utils/FPUtil/FPBits.h"
|
||||
#include "utils/FPUtil/ManipulationFunctions.h"
|
||||
#include "utils/FPUtil/TestHelpers.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
LIST_LDEXP_TESTS(double, __llvm_libc::ldexp)
|
|
@ -0,0 +1,21 @@
|
|||
//===-- Unittests for ldexpf ----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "LdExpTest.h"
|
||||
|
||||
#include "include/math.h"
|
||||
#include "src/math/ldexpf.h"
|
||||
#include "utils/CPP/Functional.h"
|
||||
#include "utils/FPUtil/FPBits.h"
|
||||
#include "utils/FPUtil/ManipulationFunctions.h"
|
||||
#include "utils/FPUtil/TestHelpers.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
LIST_LDEXP_TESTS(float, __llvm_libc::ldexpf)
|
|
@ -0,0 +1,21 @@
|
|||
//===-- Unittests for ldexpl ----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "LdExpTest.h"
|
||||
|
||||
#include "include/math.h"
|
||||
#include "src/math/ldexpl.h"
|
||||
#include "utils/CPP/Functional.h"
|
||||
#include "utils/FPUtil/FPBits.h"
|
||||
#include "utils/FPUtil/ManipulationFunctions.h"
|
||||
#include "utils/FPUtil/TestHelpers.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
LIST_LDEXP_TESTS(long double, __llvm_libc::ldexpl)
|
|
@ -116,6 +116,30 @@ static inline T logb(T x) {
|
|||
return normal.exponent;
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
|
||||
static inline T ldexp(T x, int exp) {
|
||||
FPBits<T> bits(x);
|
||||
if (bits.isZero() || bits.isInfOrNaN() || exp == 0)
|
||||
return x;
|
||||
|
||||
// NormalFloat uses int32_t to store the true exponent value. We should ensure
|
||||
// that adding |exp| to it does not lead to integer rollover. But, we |exp|
|
||||
// value is larger the exponent range for type T, then we can return infinity
|
||||
// early.
|
||||
if (exp > FPBits<T>::maxExponent)
|
||||
return bits.sign ? FPBits<T>::negInf() : FPBits<T>::inf();
|
||||
|
||||
// Similarly on the negative side.
|
||||
if (exp < -FPBits<T>::maxExponent)
|
||||
return bits.sign ? FPBits<T>::negZero() : FPBits<T>::zero();
|
||||
|
||||
// For all other values, NormalFloat to T conversion handles it the right way.
|
||||
NormalFloat<T> normal(bits);
|
||||
normal.exponent += exp;
|
||||
return normal;
|
||||
}
|
||||
|
||||
} // namespace fputil
|
||||
} // namespace __llvm_libc
|
||||
|
||||
|
|
|
@ -93,30 +93,47 @@ template <typename T> struct NormalFloat {
|
|||
// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
|
||||
constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2;
|
||||
if (biasedExponent > maxExponentValue) {
|
||||
// TODO: Should infinity with the correct sign be returned?
|
||||
return FPBits<T>::buildNaN(1);
|
||||
return sign ? FPBits<T>::negInf() : FPBits<T>::inf();
|
||||
}
|
||||
|
||||
FPBits<T> result(T(0.0));
|
||||
result.sign = sign;
|
||||
|
||||
constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1;
|
||||
if (exponent < subnormalExponent) {
|
||||
unsigned shift = subnormalExponent - exponent;
|
||||
if (shift <= MantissaWidth<T>::value) {
|
||||
// Since exponent > subnormalExponent, shift is strictly greater than
|
||||
// zero.
|
||||
if (shift <= MantissaWidth<T>::value + 1) {
|
||||
// Generate a subnormal number. Might lead to loss of precision.
|
||||
// We round to nearest and round halfway cases to even.
|
||||
const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
|
||||
const UIntType shiftOutValue = mantissa & shiftOutMask;
|
||||
const UIntType halfwayValue = UIntType(1) << (shift - 1);
|
||||
result.exponent = 0;
|
||||
result.mantissa = mantissa >> shift;
|
||||
result.sign = sign;
|
||||
UIntType newMantissa = result.mantissa;
|
||||
if (shiftOutValue > halfwayValue) {
|
||||
newMantissa += 1;
|
||||
} else if (shiftOutValue == halfwayValue) {
|
||||
// Round to even.
|
||||
if (result.mantissa & 0x1)
|
||||
newMantissa += 1;
|
||||
}
|
||||
result.mantissa = newMantissa;
|
||||
// Adding 1 to mantissa can lead to overflow. This can only happen if
|
||||
// mantissa was all ones (0b111..11). For such a case, we will carry
|
||||
// the overflow into the exponent.
|
||||
if (newMantissa == one)
|
||||
result.exponent = 1;
|
||||
return result;
|
||||
} else {
|
||||
// TODO: Should zero with the correct sign be returned?
|
||||
return FPBits<T>::buildNaN(1);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.exponent = exponent + FPBits<T>::exponentBias;
|
||||
result.mantissa = mantissa;
|
||||
result.sign = sign;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -192,32 +209,50 @@ template <> inline NormalFloat<long double>::operator long double() const {
|
|||
// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
|
||||
constexpr int maxExponentValue = (1 << ExponentWidth<long double>::value) - 2;
|
||||
if (biasedExponent > maxExponentValue) {
|
||||
// TODO: Should infinity with the correct sign be returned?
|
||||
return FPBits<long double>::buildNaN(1);
|
||||
return sign ? FPBits<long double>::negInf() : FPBits<long double>::inf();
|
||||
}
|
||||
|
||||
FPBits<long double> result(0.0l);
|
||||
result.sign = sign;
|
||||
|
||||
constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1;
|
||||
if (exponent < subnormalExponent) {
|
||||
unsigned shift = subnormalExponent - exponent;
|
||||
if (shift <= MantissaWidth<long double>::value) {
|
||||
if (shift <= MantissaWidth<long double>::value + 1) {
|
||||
// Generate a subnormal number. Might lead to loss of precision.
|
||||
// We round to nearest and round halfway cases to even.
|
||||
const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
|
||||
const UIntType shiftOutValue = mantissa & shiftOutMask;
|
||||
const UIntType halfwayValue = UIntType(1) << (shift - 1);
|
||||
result.exponent = 0;
|
||||
result.mantissa = mantissa >> shift;
|
||||
result.implicitBit = 0;
|
||||
result.sign = sign;
|
||||
UIntType newMantissa = result.mantissa;
|
||||
if (shiftOutValue > halfwayValue) {
|
||||
newMantissa += 1;
|
||||
} else if (shiftOutValue == halfwayValue) {
|
||||
// Round to even.
|
||||
if (result.mantissa & 0x1)
|
||||
newMantissa += 1;
|
||||
}
|
||||
result.mantissa = newMantissa;
|
||||
// Adding 1 to mantissa can lead to overflow. This can only happen if
|
||||
// mantissa was all ones (0b111..11). For such a case, we will carry
|
||||
// the overflow into the exponent and set the implicit bit to 1.
|
||||
if (newMantissa == one) {
|
||||
result.exponent = 1;
|
||||
result.implicitBit = 1;
|
||||
} else {
|
||||
result.implicitBit = 0;
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
// TODO: Should zero with the correct sign be returned?
|
||||
return FPBits<long double>::buildNaN(1);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.exponent = biasedExponent;
|
||||
result.mantissa = mantissa;
|
||||
result.implicitBit = 1;
|
||||
result.sign = sign;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue