!5210 resize int8 precision improve

Merge pull request !5210 from zhaozhenlong/lite/issue/resize_int8_weight_calc
2020-08-26 19:17:05 +08:00 · 2020-08-26 19:17:05 +08:00 · ab9641558f
parent 06b7b20658 54043d81c1
commit ab9641558f
4 changed files with 150 additions and 52 deletions
--- a/mindspore/lite/nnacl/int8/resize.c
+++ b/mindspore/lite/nnacl/int8/resize.c
@ -86,6 +86,62 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int
  return NNACL_OK;
 }

+int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
+                                      const int *output_shape, const bool align_corners, QuantArg *quant_in,
+                                      QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num) {
+  if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL) {
+    return NNACL_NULL_PTR;
+  }
+
+  int32_t in_n = input_shape[0];
+  int32_t in_h = input_shape[1];
+  int32_t in_w = input_shape[2];
+  int32_t in_c = input_shape[3];
+
+  int32_t new_height = output_shape[1];
+  int32_t new_width = output_shape[2];
+  float height_scale, width_scale;
+  ComputeScaleFloat(in_h, new_height, align_corners, &height_scale);
+  ComputeScaleFloat(in_w, new_width, align_corners, &width_scale);
+
+  int n, h, w, c;
+  for (n = 0; n < in_n; n++) {
+    for (h = tid; h < new_height; h += thread_num) {
+      float actual_y;
+      int bottom, top;
+      float bottom_weight, top_weight;
+      ComputeInterpolationArgsFloatWeight(h, height_scale, in_h, &actual_y, &bottom, &bottom_weight, &top, &top_weight);
+      for (w = 0; w < new_width; w++) {
+        float actual_x;
+        int left, right;
+        float left_weight, right_weight;
+        ComputeInterpolationArgsFloatWeight(w, width_scale, in_w, &actual_x, &left, &left_weight, &right,
+                                            &right_weight);
+        for (c = 0; c < in_c; c++) {
+          float bottom_left_value = ((int32_t)input_data[offset(input_shape, n, bottom, left, c)] - quant_in->zp_) *
+                                    bottom_weight * left_weight;
+          float bottom_right_value = ((int32_t)input_data[offset(input_shape, n, bottom, right, c)] - quant_in->zp_) *
+                                     bottom_weight * right_weight;
+          float top_left_value =
+            ((int32_t)input_data[offset(input_shape, n, top, left, c)] - quant_in->zp_) * top_weight * left_weight;
+          float top_right_value =
+            ((int32_t)input_data[offset(input_shape, n, top, right, c)] - quant_in->zp_) * top_weight * right_weight;
+          float interp_value = bottom_left_value + bottom_right_value + top_left_value + top_right_value;
+
+          const int out_interp_value = MultiplyByQuantizedMultiplier((int32_t)interp_value, mul_arg->multiplier_,
+                                                                     mul_arg->left_shift_, mul_arg->right_shift_) +
+                                       quant_out->zp_;
+          int8_t out_value;
+          out_value = out_interp_value > INT8_MAX ? INT8_MAX : out_interp_value;
+          out_value = out_value < INT8_MIN ? INT8_MIN : out_value;
+          output_data[offset(output_shape, n, h, w, c)] = out_value;
+        }
+      }
+    }
+  }
+  return NNACL_OK;
+}
+
 int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                    const int *output_shape, const bool align_corners, int tid, int thread_num) {
  int batch, y, x, c;
@ -133,6 +189,22 @@ void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int3
  *scaled_high_weight = *scaled_pos - (1 << 10) * (*low);
 }

+void ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale) {
+  *scale = (float)in_value / out_value;
+  if (align_corners && out_value > 1) {
+    *scale = (float)(in_value - 1) / (out_value - 1);
+  }
+}
+
+void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
+                                         int32_t *low, float *low_weight, int32_t *high, float *high_weight) {
+  *actual_pos = pos * scale;
+  *low = *actual_pos > 0 ? floor(*actual_pos) : 0;
+  *low_weight = 1.0 - (*actual_pos - *low);
+  *high = *low + 1 < size ? *low + 1 : size - 1;
+  *high_weight = *actual_pos - (*low);
+}
+
 void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
                               int32_t *nearest) {
  if (new_size == 0) {
--- a/mindspore/lite/nnacl/int8/resize.h
+++ b/mindspore/lite/nnacl/int8/resize.h
@ -31,6 +31,20 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int
                       const bool align_corners, QuantArg *quant_in, QuantArg *quant_out, const QuantMulArg *mul_arg,
                       int tid, int thread_num);

+int ResizeBilinearInt8WithFloatWeight(const int8_t *input_data, int8_t *output_data, const int *input_shape,
+                                      const int *output_shape, const bool align_corners, QuantArg *quant_in,
+                                      QuantArg *quant_out, const QuantMulArg *mul_arg, int tid, int thread_num);
+
+void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale);
+
+void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
+                              int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight);
+
+void ComputeScaleFloat(const int32_t in_value, const int32_t out_value, const bool align_corners, float *scale);
+
+void ComputeInterpolationArgsFloatWeight(const int32_t pos, const float scale, const int32_t size, float *actual_pos,
+                                         int32_t *low, float *low_weight, int32_t *high, float *high_weight);
+
 int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
                                    const int *output_shape, const bool align_corners, int tid, int thread_num);

@ -38,11 +52,6 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con
                              const int *output_shape, const bool align_corners, const QuantMulArg *multiplier,
                              QuantArg *quant_in, QuantArg *quant_out, int tid, int thread_num);

-void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale);
-
-void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int32_t size, int32_t *scaled_pos,
-                              int32_t *low, int32_t *scaled_low_weight, int32_t *high, int32_t *scaled_high_weight);
-
 void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
                               int32_t *nearest);
 #ifdef __cplusplus
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@ -35,9 +35,9 @@ int ResizeInt8CPUKernel::Init() {
  if (ret != RET_OK) {
    return ret;
  }
-  quant_in_ = new(std::nothrow) QuantArg;
+  quant_in_ = new (std::nothrow) QuantArg;
  MS_ASSERT(quant_in_);
-  quant_out_ = new(std::nothrow) QuantArg;
+  quant_out_ = new (std::nothrow) QuantArg;
  MS_ASSERT(quant_out_);
  auto input = in_tensors_.at(0);
  quant_in_->zp_ = input->GetQuantParams().front().zeroPoint;
@ -46,7 +46,7 @@ int ResizeInt8CPUKernel::Init() {
  quant_out_->zp_ = output->GetQuantParams().front().zeroPoint;
  quant_out_->scale_ = output->GetQuantParams().front().scale;

-  multiplier_ = new(std::nothrow) QuantMulArg;
+  multiplier_ = new (std::nothrow) QuantMulArg;
  MS_ASSERT(multiplier_);
  QuantizeRoundParameter(quant_in_->scale_ / quant_out_->scale_, &multiplier_->multiplier_, &multiplier_->left_shift_,
                         &multiplier_->right_shift_);
@ -85,9 +85,14 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
  int ret = 0;
  switch (method_) {
    case static_cast<int>(schema::ResizeMethod_BILINEAR): {
-      ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
-                               align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_);
-
+      if (quant_in_->zp_ == 0) {
+        ret = ResizeBilinearInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
+                                 align_corners_, quant_in_, quant_out_, multiplier_, task_id, context_->thread_num_);
+      } else {
+        ret = ResizeBilinearInt8WithFloatWeight(input_data, output_data, input_shape.data(),
+                                                out_tensors_[0]->shape().data(), align_corners_, quant_in_, quant_out_,
+                                                multiplier_, task_id, context_->thread_num_);
+      }
      break;
    }
    case static_cast<int>(schema::ResizeMethod_NEAREST_NEIGHBOR): {
@ -95,25 +100,12 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
      bool same_scale = abs(quant_out_->scale_ - quant_in_->scale_) < 1e-6;
      if (same_zp && same_scale) {
        ret =
-            ResizeNearestNeighborInt8Simple(input_data,
-                                            output_data,
-                                            input_shape.data(),
-                                            out_tensors_[0]->shape().data(),
-                                            align_corners_,
-                                            task_id,
-                                            context_->thread_num_);
+          ResizeNearestNeighborInt8Simple(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
+                                          align_corners_, task_id, context_->thread_num_);
      } else {
        ret =
-            ResizeNearestNeighborInt8(input_data,
-                                      output_data,
-                                      input_shape.data(),
-                                      out_tensors_[0]->shape().data(),
-                                      align_corners_,
-                                      multiplier_,
-                                      quant_in_,
-                                      quant_out_,
-                                      task_id,
-                                      context_->thread_num_);
+          ResizeNearestNeighborInt8(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
+                                    align_corners_, multiplier_, quant_in_, quant_out_, task_id, context_->thread_num_);
      }
      break;
    }
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/resize_bilinear_int8_tests.cc
@ -84,14 +84,13 @@ TEST_F(TestResizeBilinearInt8, Bilinear0) {
  int8_t output_data[16] = {0};
  std::vector<int> in_shape = {1, 2, 2, 1};
  std::vector<int> out_shape = {1, 4, 4, 1};
-  const lite::tensor::QuantArg quant_in = {0.005f, 2};
-  const lite::tensor::QuantArg quant_out = {0.008f, 5};
+  const lite::tensor::QuantArg quant_in = {0.005f, 0};
+  const lite::tensor::QuantArg quant_out = {0.008f, 0};
  bool align_corners = false;
  int thread_num = 1;
-  int8_t expect[16] = {4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 5, 5, 6, 6};
+  int8_t expect[16] = {0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
-  kernel_->Init();
  kernel_->Run();

  CompareOutputInt8(output_data, expect, 16, err_percent_);
@ -104,20 +103,19 @@ TEST_F(TestResizeBilinearInt8, Bilinear1) {
  int8_t input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
  int8_t output_data[160] = {0};
-  const lite::tensor::QuantArg quant_in = {0.005f, 2};
-  const lite::tensor::QuantArg quant_out = {0.008f, 5};
+  const lite::tensor::QuantArg quant_in = {0.005f, 0};
+  const lite::tensor::QuantArg quant_out = {0.008f, 0};
  int thread_num = 1;
  bool align_corners = false;
-  int8_t expect[160] = {4,  4,  5,  6,  6,  5,  6,  7,  7,  8,  7,  8,  8,  9,  9,  7,  8,  8,  9,  9,  7,  8,  8,
-                        9,  9,  8,  9,  10, 10, 11, 10, 11, 11, 12, 13, 10, 11, 11, 12, 13, 10, 11, 11, 12, 13, 12,
-                        12, 13, 13, 14, 13, 14, 14, 15, 16, 13, 14, 14, 15, 16, 10, 11, 11, 12, 13, 12, 12, 13, 13,
-                        14, 13, 14, 14, 15, 16, 13, 14, 14, 15, 16, 16, 17, 18, 18, 19, 18, 18, 19, 20, 20, 19, 20,
-                        21, 21, 22, 19, 20, 21, 21, 22, 19, 20, 21, 21, 22, 21, 22, 22, 23, 23, 23, 23, 24, 24, 25,
-                        23, 23, 24, 24, 25, 23, 23, 24, 24, 25, 24, 25, 25, 26, 27, 26, 26, 27, 28, 28, 26, 26, 27,
-                        28, 28, 23, 23, 24, 24, 25, 24, 25, 25, 26, 27, 26, 26, 27, 28, 28, 26, 26, 27, 28, 28};
+  int8_t expect[160] = {0,  1,  1,  2,  2,  2,  2,  3,  3,  4,  3,  4,  4,  5,  6,  3,  4,  4,  5,  6,  3,  4,  4,
+                        5,  6,  5,  5,  6,  7,  7,  6,  7,  8,  8,  9,  6,  7,  8,  8,  9,  6,  7,  7,  8,  9,  8,
+                        8,  9,  10, 10, 9,  10, 11, 11, 12, 9,  10, 11, 11, 12, 6,  7,  7,  8,  9,  8,  8,  9,  10,
+                        10, 9,  10, 11, 11, 12, 9,  10, 11, 11, 12, 13, 13, 14, 14, 15, 14, 15, 15, 16, 17, 16, 16,
+                        17, 18, 18, 16, 16, 17, 18, 18, 16, 16, 17, 18, 18, 17, 18, 18, 19, 20, 19, 19, 20, 21, 21,
+                        19, 19, 20, 21, 21, 19, 19, 20, 21, 21, 20, 21, 22, 22, 23, 22, 23, 23, 24, 24, 22, 23, 23,
+                        24, 24, 19, 19, 20, 21, 21, 20, 21, 22, 22, 23, 22, 23, 23, 24, 24, 22, 23, 23, 24, 24};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
-  kernel_->Init();
  kernel_->Run();

  CompareOutputInt8(output_data, expect, 160, err_percent_);
@ -131,22 +129,49 @@ TEST_F(TestResizeBilinearInt8, Bilinear2) {
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
  int8_t output_data[160] = {0};

-  const lite::tensor::QuantArg quant_in = {0.005f, 2};
-  const lite::tensor::QuantArg quant_out = {0.008f, 5};
+  const lite::tensor::QuantArg quant_in = {0.005f, 0};
+  const lite::tensor::QuantArg quant_out = {0.008f, 0};
  int thread_num = 2;
  bool align_corners = true;
-  int8_t expect[160] = {4,  4,  5,  6,  6,  5,  5,  6,  7,  7,  6,  6,  7,  8,  8,  7,  8,  8,  9,  9,  6,  6,  7,
-                        8,  8,  7,  8,  8,  9,  9,  8,  9,  9,  10, 10, 9,  10, 10, 11, 11, 8,  9,  9,  10, 10, 9,
-                        10, 10, 11, 11, 10, 11, 11, 12, 13, 11, 12, 12, 13, 14, 10, 11, 11, 12, 13, 11, 12, 12, 13,
-                        14, 12, 13, 13, 14, 15, 13, 14, 14, 15, 16, 16, 17, 18, 18, 19, 17, 18, 19, 19, 20, 18, 19,
-                        20, 20, 21, 19, 20, 21, 21, 22, 18, 19, 20, 20, 21, 19, 20, 21, 21, 22, 20, 21, 22, 22, 23,
-                        21, 22, 23, 23, 24, 20, 21, 22, 22, 23, 21, 22, 23, 23, 24, 23, 23, 24, 24, 25, 24, 24, 25,
-                        25, 26, 23, 23, 24, 24, 25, 24, 24, 25, 25, 26, 25, 25, 26, 26, 27, 26, 26, 27, 28, 28};
+  int8_t expect[160] = {0,  1,  1,  2,  2,  1,  2,  2,  3,  4,  2,  3,  3,  4,  5,  3,  4,  4,  5,  6,  2,  3,  3,
+                        4,  5,  3,  4,  4,  5,  6,  4,  5,  5,  6,  7,  5,  6,  6,  7,  8,  4,  5,  5,  6,  7,  5,
+                        6,  6,  7,  8,  6,  7,  8,  8,  9,  7,  8,  9,  9,  10, 6,  7,  7,  8,  9,  7,  8,  9,  9,
+                        10, 8,  9,  10, 10, 11, 9,  10, 11, 11, 12, 13, 13, 14, 14, 15, 14, 14, 15, 15, 16, 15, 15,
+                        16, 16, 17, 16, 16, 17, 18, 18, 15, 15, 16, 16, 17, 16, 16, 17, 18, 18, 17, 17, 18, 19, 19,
+                        18, 18, 19, 20, 20, 17, 17, 18, 19, 19, 18, 18, 19, 20, 20, 19, 19, 20, 21, 21, 20, 20, 21,
+                        22, 22, 19, 19, 20, 21, 21, 20, 20, 21, 22, 22, 21, 21, 22, 23, 23, 22, 23, 23, 24, 24};

  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
-  kernel_->Init();
  kernel_->Run();

  CompareOutputInt8(output_data, expect, 160, err_percent_);
 }
+
+// 2*2*2*5 -> 2*4*4*5 thread num 2, align corners zp -128
+TEST_F(TestResizeBilinearInt8, Bilinear3) {
+  std::vector<int> in_shape = {2, 2, 2, 5};
+  std::vector<int> out_shape = {2, 4, 4, 5};
+  int8_t input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39};
+  int8_t output_data[160] = {0};
+
+  const lite::tensor::QuantArg quant_in = {0.005f, 2};
+  const lite::tensor::QuantArg quant_out = {0.005f, 2};
+  int thread_num = 2;
+  bool align_corners = true;
+  int8_t expect[160] = {0,  1,  2,  3,  4,  2,  3,  4,  5,  6,  3,  4,  5,  6,  7,  5,  6,  7,  8,  9,  3,  4,  5,
+                        6,  7,  5,  6,  7,  8,  9,  7,  8,  9,  10, 11, 8,  9,  10, 11, 12, 7,  8,  9,  10, 11, 8,
+                        9,  10, 11, 12, 10, 11, 12, 13, 14, 12, 13, 14, 15, 16, 10, 11, 12, 13, 14, 12, 13, 14, 15,
+                        16, 13, 14, 15, 16, 17, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 22, 23, 24, 25, 26, 23, 24,
+                        25, 26, 27, 25, 26, 27, 28, 29, 23, 24, 25, 26, 27, 25, 26, 27, 28, 29, 27, 28, 29, 30, 31,
+                        28, 29, 30, 31, 32, 27, 28, 29, 30, 31, 28, 29, 30, 31, 32, 30, 31, 32, 33, 34, 32, 33, 34,
+                        35, 36, 30, 31, 32, 33, 34, 32, 33, 34, 35, 36, 33, 34, 35, 36, 37, 35, 36, 37, 38, 39};
+
+  Prepare(in_shape, out_shape, input_data, output_data, quant_in, quant_out, align_corners, thread_num);
+  kernel_->Run();
+
+  err_percent_ = 0.325f;
+  CompareOutputInt8(output_data, expect, 160, err_percent_);
+}
+
 }  // namespace mindspore