forked from OSchip/llvm-project
[mlir][sparse] Preserve NaNs when converting float to bfloat
This commit is contained in:
parent
90c9d41c8a
commit
b3127769b1
|
@ -12,6 +12,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/ExecutionEngine/Float16bits.h"
|
||||
#include <cmath>
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -106,6 +107,9 @@ const uint32_t kF32BfMantiBitDiff = 16;
|
|||
// Constructs the 16 bit representation for a bfloat value from a float value.
|
||||
// This implementation is adapted from Eigen.
|
||||
uint16_t float2bfloat(float floatValue) {
|
||||
if (std::isnan(floatValue))
|
||||
return std::signbit(floatValue) ? 0xFFC0 : 0x7FC0;
|
||||
|
||||
Float32Bits floatBits;
|
||||
floatBits.f = floatValue;
|
||||
uint16_t bfloatBits;
|
||||
|
|
Loading…
Reference in New Issue