!24595 [MSLITE] fix fp16 bugs for diverse networks compatibility in tensorrt delegate jfr_yolo

Merge pull request !24595 from Liu_Xuu/trt_1009_concate16
2021-10-11 03:06:29 +00:00 · 2021-10-11 03:06:29 +00:00 · c8ffcaa1dd
parent 7ee81a4f76 2edd3332f7
commit c8ffcaa1dd
4 changed files with 41 additions and 1 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
@ -18,6 +18,7 @@
 #include <math.h>
 #include <limits.h>
 #include "nnacl/infer/infer_register.h"
+#include "nnacl/nnacl_common.h"

 int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) {
  const TensorC *input = inputs[0];
@ -48,6 +49,19 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) {
        MS_CHECK_INT_MUL_NOT_OVERFLOW((int)(data[2]), GetWidth(input), NNACL_ERRCODE_MUL_OVERFLOW);
        param->new_height_ = round(data[1] * GetHeight(input));
        param->new_width_ = round(data[2] * GetWidth(input));
+      } else if (shape_tensor->data_type_ == kNumberTypeFloat16) {
+        uint16_t *data = (uint16_t *)(shape_tensor->data_);
+        if (data == NULL) {
+          return NNACL_INFER_INVALID;
+        }
+
+        float scale_height = ShortToFloat32(data[1]);
+        float scale_width = ShortToFloat32(data[2]);
+
+        MS_CHECK_INT_MUL_NOT_OVERFLOW(scale_height, GetHeight(input), NNACL_ERRCODE_MUL_OVERFLOW);
+        MS_CHECK_INT_MUL_NOT_OVERFLOW(scale_width, GetWidth(input), NNACL_ERRCODE_MUL_OVERFLOW);
+        param->new_height_ = round(scale_height * GetHeight(input));
+        param->new_width_ = round(scale_width * GetWidth(input));
      }
      break;
    }
--- a/mindspore/lite/src/delegate/tensorrt/op/resize_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/resize_tensorrt.cc
@ -114,7 +114,7 @@ int ResizeTensorRT::SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1:
        case DataType::kNumberTypeFloat16: {
          const uint16_t *shape_data_fp16 = static_cast<const uint16_t *>(shape_data);
          for (int i = 0; i < in_tensors_[1].ElementNum(); i++) {
-            out_shape.push_back(static_cast<float>(*(shape_data_fp16 + i)));
+            out_shape.push_back(ShortToFloat32(*(shape_data_fp16 + i)));
          }
          break;
        }
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
@ -388,4 +388,23 @@ std::string GetTensorFormat(nvinfer1::ITensor *trt_tensor, mindspore::Format for
  out_string += dim_string;
  return out_string;
 }
+float ShortToFloat32(uint16_t src_value) {
+  const float32_bits magic = {113 << 23};
+  const unsigned int shifted_exp = 0x7c00 << 13;
+  float32_bits o;
+
+  o.u = (src_value & 0x7fff) << 13;
+  unsigned int exp = shifted_exp & o.u;
+  o.u += (127 - 15) << 23;
+
+  if (exp == shifted_exp) {
+    o.u += (128 - 16) << 23;
+  } else if (exp == 0) {
+    o.u += 1 << 23;
+    o.f -= magic.f;
+  }
+
+  o.u |= (src_value & 0x8000) << 16;
+  return o.f;
+}
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
@ -37,6 +37,11 @@ struct ActivationParams {
  float beta;
 };

+typedef union float32_bits {
+  unsigned int u;
+  float f;
+} float32_bits;
+
 // Convert Tensor data to Cuda dims.
 nvinfer1::Dims ConvertCudaDims(const void *data, int64_t size);

@ -79,6 +84,8 @@ void PackNHWCToNCHWFp16(const void *src, void *dst, size_t batch, size_t plane,

 std::string GetTensorFormat(nvinfer1::ITensor *trt_tensor, mindspore::Format format);

+float ShortToFloat32(uint16_t src_value);
+
 template <typename T1, typename T2>
 bool SameDims(const std::vector<T1> &shape1, const std::vector<T2> &shape2) {
  if (shape1.size() != shape2.size()) {