[mlir][sparse] Preserve NaNs when converting float to bfloat

2022-06-21 15:22:35 +02:00 · 2022-06-21 15:22:35 +02:00 · b3127769b1
parent 90c9d41c8a
commit b3127769b1
1 changed files with 4 additions and 0 deletions
--- a/mlir/lib/ExecutionEngine/Float16bits.cpp
+++ b/mlir/lib/ExecutionEngine/Float16bits.cpp
@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/ExecutionEngine/Float16bits.h"
+#include <cmath>

 namespace {

@ -106,6 +107,9 @@ const uint32_t kF32BfMantiBitDiff = 16;
 // Constructs the 16 bit representation for a bfloat value from a float value.
 // This implementation is adapted from Eigen.
 uint16_t float2bfloat(float floatValue) {
+  if (std::isnan(floatValue))
+    return std::signbit(floatValue) ? 0xFFC0 : 0x7FC0;
+
  Float32Bits floatBits;
  floatBits.f = floatValue;
  uint16_t bfloatBits;