From cd1596b05a9705303eb1d05c59117a42148c413d Mon Sep 17 00:00:00 2001
From: ling <lingqiaomin.huawei.com>
Date: Fri, 25 Sep 2020 17:18:19 +0800
Subject: [PATCH] [MSLITE] deconv int8

---
 mindspore/lite/nnacl/fp16/activation_fp16.c   | 12 ++++
 mindspore/lite/nnacl/fp16/activation_fp16.h   |  1 +
 .../lite/nnacl/fp16/deconv_winograd_fp16.c    | 29 ++++++----
 mindspore/lite/nnacl/fp32/deconv_winograd.c   | 20 +++----
 mindspore/lite/nnacl/int8/common_func_int8.c  |  8 ---
 mindspore/lite/nnacl/int8/deconv_int8.c       | 58 ++-----------------
 .../kernel/arm/fp16/activation_fp16.cc        | 14 ++++-
 .../arm/fp16/deconvolution_winograd_fp16.cc   | 50 ++++++++--------
 .../src/runtime/kernel/arm/fp32/activation.cc | 12 +++-
 .../kernel/arm/int8/deconvolution_int8.cc     |  6 +-
 .../kernel/arm/int8/deconv_int8_tests.cc      | 35 -----------
 11 files changed, 99 insertions(+), 146 deletions(-)

diff --git a/mindspore/lite/nnacl/fp16/activation_fp16.c b/mindspore/lite/nnacl/fp16/activation_fp16.c
index 33060ed46fd..6f9b878fc3a 100644
--- a/mindspore/lite/nnacl/fp16/activation_fp16.c
+++ b/mindspore/lite/nnacl/fp16/activation_fp16.c
@@ -94,3 +94,15 @@ int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num) {
   }
   return NNACL_OK;
 }
+
+int SwishFp16(const float16_t *src, float16_t *dst, int ele_num) {
+  int ret = SigmoidFp16(src, dst, ele_num);
+  if (ret != NNACL_OK) {
+    return NNACL_ERR;
+  }
+  int index = 0;
+  for (; index < ele_num; index++) {
+    dst[index] = src[index] * dst[index];
+  }
+  return NNACL_OK;
+}
diff --git a/mindspore/lite/nnacl/fp16/activation_fp16.h b/mindspore/lite/nnacl/fp16/activation_fp16.h
index eea4b489f8f..d1ed088c4df 100644
--- a/mindspore/lite/nnacl/fp16/activation_fp16.h
+++ b/mindspore/lite/nnacl/fp16/activation_fp16.h
@@ -38,6 +38,7 @@ int LReluFp16(const float16_t *src, float16_t *dst, int ele_num, float16_t alpha
 int SigmoidFp16(const float16_t *src, float16_t *dst, int ele_num);
 int TanhFp16(const float16_t *src, float16_t *dst, int ele_num);
 int HSwishFp16(const float16_t *src, float16_t *dst, int ele_num);
+int SwishFp16(const float16_t *src, float16_t *dst, int ele_num);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/fp16/deconv_winograd_fp16.c b/mindspore/lite/nnacl/fp16/deconv_winograd_fp16.c
index bf87204f755..64dd317d4d9 100644
--- a/mindspore/lite/nnacl/fp16/deconv_winograd_fp16.c
+++ b/mindspore/lite/nnacl/fp16/deconv_winograd_fp16.c
@@ -110,10 +110,10 @@ void DeConvWgMergeFp16(const float16_t *src, float16_t *dst, size_t src_stride,
   return;
 }
 
-void _deConvWinogradFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight_buf, float16_t *tmp_buf,
-                         float16_t *at_buf, float16_t *a_mid_buf, float16_t *trans_a_buf, bool *transfered,
-                         float16_t *bt_buf, float16_t *b_tmp_buf, int unit_size, int w_start, int h_start,
-                         ConvParameter *conv_param, DeConvParam *deconv_param) {
+void DeConvWgCalWgFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight_buf, float16_t *tmp_buf,
+                       float16_t *at_buf, float16_t *a_mid_buf, float16_t *trans_a_buf, bool *transfered,
+                       float16_t *bt_buf, float16_t *b_tmp_buf, int unit_size, int w_start, int h_start,
+                       ConvParameter *conv_param, DeConvParam *deconv_param) {
   int winograd_plane = unit_size * unit_size;
   if (!transfered[unit_size]) {
     WinogradTransLeftFp16(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size,
@@ -151,8 +151,8 @@ void _deConvWinogradFp16(float16_t *tile_in, float16_t *tile_out, float16_t *wei
   return;
 }
 
-void _deConvCommonFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight, float16_t *tmp_buf, int h_start,
-                       int w_start, int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
+void DeConvWgCalCommFp16(float16_t *tile_in, float16_t *tile_out, float16_t *weight, float16_t *tmp_buf, int h_start,
+                         int w_start, int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
   int count = deconv_param->oc_div4_ * w_size * h_size;
   int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
   int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_;
@@ -218,6 +218,7 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa
     unit->winograd_.AT_ = malloc(unit->winograd_.i_ * unit->winograd_.o_ * sizeof(float16_t));
     if (unit->winograd_.AT_ == NULL) {
       free(current_unit_weight);
+      current_unit_weight = NULL;
       return NNACL_NULL_PTR;
     }
     Float32ToFloat16(matrix_at, unit->winograd_.AT_, unit->winograd_.i_ * unit->winograd_.o_);
@@ -227,6 +228,8 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa
     if (unit->winograd_.BT_ == NULL) {
       free(current_unit_weight);
       free(unit->winograd_.AT_);
+      current_unit_weight = NULL;
+      unit->winograd_.AT_ = NULL;
       return NNACL_NULL_PTR;
     }
     Float32ToFloat16(matrix_bt, unit->winograd_.BT_, unit->winograd_.o_ * unit->winograd_.o_);
@@ -238,6 +241,9 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa
       free(current_unit_weight);
       free(unit->winograd_.AT_);
       free(unit->winograd_.BT_);
+      current_unit_weight = NULL;
+      unit->winograd_.AT_ = NULL;
+      unit->winograd_.BT_ = NULL;
       return NNACL_NULL_PTR;
     }
 
@@ -268,6 +274,7 @@ int PackDeConvWgDataFp16(float16_t *nhwc_weight, DeConvComputeUnit *unit, ConvPa
   }
 
   free(current_unit_weight);
+  current_unit_weight = NULL;
   return NNACL_OK;
 }
 
@@ -320,14 +327,14 @@ void DeconvWgFp16(float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_ou
       float16_t *tmp_b = (float16_t *)unit->winograd_.b_buffer_ + task_id * unit->winograd_.kh_ * unit->winograd_.kw_ *
                                                                     DECONV_WINOGRAD_DEFAULT_TILE *
                                                                     deconv_param->oc_up4_;
-      _deConvWinogradFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->winograd_.AT_, mid_a, dst_a,
-                          transfered, unit->winograd_.BT_, tmp_b, unit->winograd_.kh_, unit->w_start_, unit->h_start_,
-                          conv_param, deconv_param);
+      DeConvWgCalWgFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->winograd_.AT_, mid_a, dst_a,
+                        transfered, unit->winograd_.BT_, tmp_b, unit->winograd_.kh_, unit->w_start_, unit->h_start_,
+                        conv_param, deconv_param);
     } else {
       float16_t *tmp_buf = (float16_t *)unit->tmp_buffer_ + task_id * deconv_param->oc_div4_ * unit->w_size_ *
                                                               unit->h_size_ * DECONV_WINOGRAD_DEFAULT_TILE * C4NUM;
-      _deConvCommonFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_,
-                        unit->h_size_, unit->w_size_, conv_param, deconv_param);
+      DeConvWgCalCommFp16(tile_in, tile_out, (float16_t *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_,
+                          unit->h_size_, unit->w_size_, conv_param, deconv_param);
     }
   }
   return;
diff --git a/mindspore/lite/nnacl/fp32/deconv_winograd.c b/mindspore/lite/nnacl/fp32/deconv_winograd.c
index 8467c58fd99..1006ad10c9d 100644
--- a/mindspore/lite/nnacl/fp32/deconv_winograd.c
+++ b/mindspore/lite/nnacl/fp32/deconv_winograd.c
@@ -340,9 +340,9 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s
   return;
 }
 
-void _deConvWinograd(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, const float *at_buf,
-                     float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf,
-                     int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) {
+void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, float *at_buf,
+                       float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf,
+                       int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) {
   int winograd_plane = unit_size * unit_size;
   if (!transfered[unit_size]) {
     WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT,
@@ -380,8 +380,8 @@ void _deConvWinograd(const float *tile_in, float *tile_out, float *weight_buf, f
   return;
 }
 
-void _deConvCommon(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start,
-                   int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
+void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start,
+                         int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
   int count = deconv_param->oc_div4_ * w_size * h_size;
   int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
   int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_;
@@ -461,14 +461,14 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind
                                                               DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
       float *tmp_b_buf = (float *)unit->winograd_.b_buffer_ + task_id * unit->winograd_.kh_ * unit->winograd_.kw_ *
                                                                 deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
-      _deConvWinograd(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->winograd_.AT_, wg_mid_a_buf,
-                      wg_dst_a_buf, transfered, unit->winograd_.BT_, tmp_b_buf, unit->winograd_.kh_, unit->w_start_,
-                      unit->h_start_, conv_param, deconv_param);
+      DeConvWgCalWgFp32(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->winograd_.AT_, wg_mid_a_buf,
+                        wg_dst_a_buf, transfered, unit->winograd_.BT_, tmp_b_buf, unit->winograd_.kh_, unit->w_start_,
+                        unit->h_start_, conv_param, deconv_param);
     } else {
       float *tmp_buf = (float *)unit->tmp_buffer_ + task_id * deconv_param->oc_div4_ * unit->w_size_ * unit->h_size_ *
                                                       DECONV_WINOGRAD_DEFAULT_TILE * C4NUM;
-      _deConvCommon(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_, unit->h_size_,
-                    unit->w_size_, conv_param, deconv_param);
+      DeConvWgCalCommFp32(tile_in, tile_out, (float *)unit->weight_, tmp_buf, unit->h_start_, unit->w_start_,
+                          unit->h_size_, unit->w_size_, conv_param, deconv_param);
     }
   }
   return;
diff --git a/mindspore/lite/nnacl/int8/common_func_int8.c b/mindspore/lite/nnacl/int8/common_func_int8.c
index fce0e50b3d3..dfe7149fff4 100644
--- a/mindspore/lite/nnacl/int8/common_func_int8.c
+++ b/mindspore/lite/nnacl/int8/common_func_int8.c
@@ -41,14 +41,6 @@ void PostConvFuncCommInt8(const int32_t *in, int8_t *out, const int32_t *bias, s
   return;
 }
 
-void PostFuncInt8C8(const int32_t *in, const int32_t *bias, int8_t *out, size_t oc, size_t plane, int32_t multiplier,
-                    int32_t left_shift, int32_t right_shift, int32_t zp, int32_t mini, int32_t maxi) {
-  /*  ((int32_t)row8x8-major + bias) * multiplier + output_zp  =>  (int8)relu  =>  (int8_t)row-major  */
-  PostConvFuncCommInt8(in, out, bias, oc, plane, oc, UP_ROUND(plane, C8NUM) * C8NUM, multiplier, mini, maxi, left_shift,
-                       right_shift, zp, C8NUM);
-  return;
-}
-
 void PostFuncInt8C4(const int32_t *in, const int32_t *bias, int8_t *out, size_t oc, size_t plane, size_t stride,
                     int32_t multiplier, int32_t left_shift, int32_t right_shift, int32_t zp, int32_t mini,
                     int32_t maxi) {
diff --git a/mindspore/lite/nnacl/int8/deconv_int8.c b/mindspore/lite/nnacl/int8/deconv_int8.c
index 9c6fba7841d..09aebaa3d75 100644
--- a/mindspore/lite/nnacl/int8/deconv_int8.c
+++ b/mindspore/lite/nnacl/int8/deconv_int8.c
@@ -17,52 +17,6 @@
 #include "nnacl/int8/deconv_int8.h"
 #include "nnacl/int8/matmul_int8.h"
 #include "nnacl/int8/common_func_int8.h"
-int DeConvPostInt8C8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
-                     ConvParameter *conv_param) {
-  /* row8x8-major(ih*iw x oc*kh*kw)  ->  row8-major(oh*ow x oc) */
-  size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
-  size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
-  size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
-  int oc8 = UP_DIV(output_channel, C8NUM);
-  int in_plane8 = UP_ROUND(input_plane, 8);
-
-  for (int c = 0; c < oc8; c++) {
-    int32_t *dst_ptr = tmp + c * output_plane * C8NUM;
-    const int32_t *src_ptr = src + c * in_plane8 * kernel_plane * C8NUM;
-    memset(dst_ptr, 0, output_plane * C8NUM * sizeof(int32_t));
-
-    for (int ih = 0; ih < conv_param->input_h_; ih++) {
-      for (int iw = 0; iw < conv_param->input_w_; iw++) {
-        int oh = ih * conv_param->stride_h_ - conv_param->pad_u_;
-        int ow = iw * conv_param->stride_w_ - conv_param->pad_l_;
-
-        int kh_start = MSMAX(0, UP_DIV(-oh, conv_param->dilation_h_));
-        int kh_end = MSMIN(conv_param->kernel_h_, UP_DIV(conv_param->output_h_ - oh, conv_param->dilation_h_));
-        int kw_start = MSMAX(0, UP_DIV(-ow, conv_param->dilation_w_));
-        int kw_end = MSMIN(conv_param->kernel_w_, UP_DIV(conv_param->output_w_ - ow, conv_param->dilation_w_));
-        for (int kh = kh_start; kh < kh_end; kh++) {
-          for (int kw = kw_start; kw < kw_end; kw++) {
-            int src_index = ih * conv_param->input_w_ * C8NUM + iw * C8NUM +
-                            kh * input_plane * conv_param->kernel_w_ * C8NUM + kw * input_plane * C8NUM;
-            int dst_index = oh * conv_param->output_w_ * C8NUM + ow * C8NUM +
-                            kh * conv_param->dilation_h_ * conv_param->output_w_ * C8NUM +
-                            kw * conv_param->dilation_w_ * C8NUM;
-            for (int i = 0; i < C8NUM; i++) {
-              dst_ptr[dst_index + i] += src_ptr[src_index + i];
-            }
-          } /*kw*/
-        }   /*kh*/
-      }     /*iw*/
-    }       /*ih*/
-  }         /*oc8*/
-
-  PostFuncInt8C8(tmp, bias, out, output_channel, output_plane, conv_param->conv_quant_arg_.quant_multiplier_[0],
-                 conv_param->conv_quant_arg_.left_shift_[0], conv_param->conv_quant_arg_.right_shift_[0],
-                 conv_param->conv_quant_arg_.output_quant_args_[0].zp_, conv_param->conv_quant_arg_.out_act_min_[0],
-                 conv_param->conv_quant_arg_.out_act_max_[0]);
-  return NNACL_OK;
-}
-
 int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
                      ConvParameter *conv_param) {
   /* row4x4-major(ih*iw x oc*kh*kw)  ->  row4-major(oh*ow x oc) */
@@ -74,8 +28,8 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
 
   int src_iw_stride = C4NUM;
   int src_ih_stride = conv_param->input_w_ * C4NUM;
-  int src_kw_stride = input_plane * C4NUM;
-  int src_kh_stride = input_plane * conv_param->kernel_w_ * C4NUM;
+  int src_kw_stride = in_plane4 * C4NUM;
+  int src_kh_stride = in_plane4 * conv_param->kernel_w_ * C4NUM;
   int dst_oh_stride = conv_param->output_w_ * C4NUM;
   int dst_ow_stride = C4NUM;
   int dst_kh_stride = conv_param->dilation_h_ * conv_param->output_w_ * C4NUM;
@@ -153,18 +107,18 @@ void DeConvWeightTransInt8(int8_t *src, int8_t *dst, int input_channel, int outp
   return;
 }
 
-void DeConvPackWeightSum(int8_t *weight, int32_t *weight_sum, int32_t input_zp, int32_t filter_zp, int deep16, int col4,
+void DeConvPackWeightSum(int8_t *weight, int32_t *weight_sum, int32_t input_zp, int32_t filter_zp, int deep, int col4,
                          bool suppport_opt) {
-  /* optimize normal -> same layout */
+  int deep16 = UP_ROUND(deep, C16NUM);
   for (int c = 0; c < col4; c++) {
     int c4div = c / C4NUM, c4mod = c % C4NUM;
     int32_t value = 0;
-    for (int r = 0; r < deep16; r++) {
+    for (int r = 0; r < deep; r++) {
       int r16div = r / C16NUM, r16mod = r % C16NUM;
       int src_index = c4div * deep16 * C4NUM + r16div * C4NUM * C16NUM + c4mod * C16NUM + r16mod;
       value += weight[src_index];
     }
-    weight_sum[c] = filter_zp * input_zp * deep16 - value * input_zp;
+    weight_sum[c] = filter_zp * input_zp * deep - value * input_zp;
   }
   return;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
index e8deb78cda8..84d4a39674e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
@@ -30,10 +30,20 @@ using mindspore::schema::ActivationType_HSWISH;
 using mindspore::schema::ActivationType_LEAKY_RELU;
 using mindspore::schema::ActivationType_RELU;
 using mindspore::schema::ActivationType_RELU6;
+using mindspore::schema::ActivationType_SWISH;
 using mindspore::schema::PrimitiveType_Activation;
 
 namespace mindspore::kernel {
-int ActivationFp16CPUKernel::Init() { return RET_OK; }
+int ActivationFp16CPUKernel::Init() {
+  if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 &&
+      type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID &&
+      type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH &&
+      type_ != schema::ActivationType_SWISH) {
+    MS_LOG(ERROR) << "Activation fp16 not support type: " << type_;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
 
 int ActivationFp16CPUKernel::ReSize() { return RET_OK; }
 
@@ -85,6 +95,8 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) {
     error_code = TanhFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count);
   } else if (type_ == schema::ActivationType_HSWISH) {
     error_code = HSwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count);
+  } else if (type_ == schema::ActivationType_SWISH) {
+    error_code = SwishFp16(fp16_input_ + stride * task_id, fp16_output_ + stride * task_id, count);
   } else {
     MS_LOG(ERROR) << "Activation fp16 not support type: " << type_;
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
index 815de47d625..b27daf09ccf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
@@ -79,32 +79,32 @@ void DeConvWinogradFp16CPUKernel::FreeResizeBuf() {
 }
 
 void DeConvWinogradFp16CPUKernel::FreeDeconvParam() {
-  for (int i = 0; i < deconv_param_->compute_size_; i++) {
-    DeConvComputeUnit &unit = deconv_param_->compute_units_[i];
-
-    if (unit.weight_ != nullptr) {
-      free(unit.weight_);
-      unit.weight_ = nullptr;
-    }
-
-    if (unit.use_winograd_) {
-      if (unit.winograd_.AT_ != nullptr) {
-        free(unit.winograd_.AT_);
-        unit.winograd_.AT_ = nullptr;
-      }
-      if (unit.winograd_.BT_ != nullptr) {
-        free(unit.winograd_.BT_);
-        unit.winograd_.BT_ = nullptr;
-      }
-    }
-  }
-
-  if (deconv_param_->compute_units_ != nullptr) {
-    free(deconv_param_->compute_units_);
-    deconv_param_->compute_units_ = nullptr;
-  }
-
   if (deconv_param_ != nullptr) {
+    for (int i = 0; i < deconv_param_->compute_size_; i++) {
+      DeConvComputeUnit &unit = deconv_param_->compute_units_[i];
+
+      if (unit.weight_ != nullptr) {
+        free(unit.weight_);
+        unit.weight_ = nullptr;
+      }
+
+      if (unit.use_winograd_) {
+        if (unit.winograd_.AT_ != nullptr) {
+          free(unit.winograd_.AT_);
+          unit.winograd_.AT_ = nullptr;
+        }
+        if (unit.winograd_.BT_ != nullptr) {
+          free(unit.winograd_.BT_);
+          unit.winograd_.BT_ = nullptr;
+        }
+      }
+    }
+
+    if (deconv_param_->compute_units_ != nullptr) {
+      free(deconv_param_->compute_units_);
+      deconv_param_->compute_units_ = nullptr;
+    }
+
     delete (deconv_param_);
     deconv_param_ = nullptr;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
index b28091433a2..5abe9005b68 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
@@ -33,7 +33,17 @@ using mindspore::schema::ActivationType_SWISH;
 using mindspore::schema::PrimitiveType_Activation;
 
 namespace mindspore::kernel {
-int ActivationCPUKernel::Init() { return RET_OK; }
+int ActivationCPUKernel::Init() {
+  if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 &&
+      type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID &&
+      type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH &&
+      type_ != schema::ActivationType_SWISH && type_ != schema::ActivationType_HSIGMOID &&
+      type_ != schema::ActivationType_HARD_TANH) {
+    MS_LOG(ERROR) << "Activation fp32 not support type: " << type_;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
 
 int ActivationCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
index 5b30b09adac..69f111f30df 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
@@ -150,7 +150,7 @@ int DeConvInt8CPUKernel::InitBiasWeight() {
     MS_LOG(ERROR) << "deconv int8 malloc weight_ptr_ error!";
     return RET_ERROR;
   }
-  memset(weight_ptr_, static_cast<int8_t>(conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_), size);
+  memset(weight_ptr_, 0, size);
   DeConvWeightTransInt8(reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData()), weight_ptr_,
                         conv_param_->input_channel_, conv_param_->output_channel_,
                         conv_param_->kernel_h_ * conv_param_->kernel_w_, support_optimize_);
@@ -163,8 +163,8 @@ int DeConvInt8CPUKernel::InitBiasWeight() {
   }
   memset(weight_sum_, 0, size * sizeof(int32_t));
   DeConvPackWeightSum(weight_ptr_, weight_sum_, conv_param_->conv_quant_arg_.input_quant_args_[0].zp_,
-                      conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, UP_ROUND(matmul_param_->deep_, C16NUM),
-                      size, support_optimize_);
+                      conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, matmul_param_->deep_, size,
+                      support_optimize_);
 
   return RET_OK;
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
index 476dc6b52f8..bcb078f30d9 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/deconv_int8_tests.cc
@@ -271,41 +271,6 @@ TEST_F(TestDeconvInt8, MatMulOptTest1) {
   CompareOutputData(tmp_output, correct_tmp_output, 12 * 3 * 8, 0);
 }
 
-TEST_F(TestDeconvInt8, PostAddTest1) {
-  int32_t in[] = {
-    -4956,  -3923,  868,   -8880, -4089, -5179, -4526, -4527, -10464, 99,    -5826, -2995, -4519, -4519, -10509, -2505,
-    -11272, 434,    -4522, -4523, -5287, -8936, -878,  373,   -4528,  -4529, -1960, -6589, 1688,  2287,  -8059,  926,
-    -2506,  -6972,  -2834, -8281, -8118, -3110, -4526, -4527, -4528,  -4529, -4519, -4519, -4519, -4519, -4519,  -4519,
-    -4520,  -4521,  -4522, -4523, -4524, -4525, -4526, -4527, -4528,  -4529, -4519, -4519, -4519, -4519, -4519,  -4519,
-    1578,   2231,   -4522, -4523, -4524, -4525, -4526, -4527, -8449,  -990,  -4519, -4519, -4519, -4519, -4519,  -4519,
-    -4303,  -10293, -4522, -4523, -4524, -4525, -4526, -4527, -4528,  -4529, -4519, -4519, -4519, -4519, -4519,  -4519,
-    -7025,  924,    -4522, -4523, -4524, -4525, -4526, -4527, -4528,  -4529, -4519, -4519, -4519, -4519, -4519,  -4519,
-    -4520,  -4521,  -4522, -4523, -4524, -4525, -4526, -4527, -4528,  -4529, -4519, -4519, -4519, -4519, -4519,  -4519};
-  int8_t co[] = {-8,  11,  99,  -80,  8,  -12, 0,  0,   112, 124, -109, 85, -24,  28, 0,   0,  -110,
-                 37,  -72, 65,  -124, 91, 0,   0,  -14, -81, 67,  90,   4,  -106, 0,  0,   47, -38,
-                 114, 125, -65, 100,  0,  0,   37, -45, 31,  -69, -66,  26, 0,    0,  -46, 100};
-  int32_t bias[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-  int8_t out[50] = {0};
-  double multiplier = 0.0183649725490196;
-  int32_t quant_multiplier;
-  int32_t left_shift;
-  int32_t right_shift;
-  QuantizeRoundParameter(multiplier, &quant_multiplier, &left_shift, &right_shift);
-  int32_t zp = 83;
-  PostFuncInt8C8(in, bias, out, 10, 5, quant_multiplier, left_shift, right_shift, zp, -128, 127);
-  CompareOutputData(out, co, 50, 1);
-
-  int8_t co_relu[] = {0, 11, 99, 0, 8, 0, 0, 0,  112, 124, 0,   85, 0,   28, 0, 0,  0, 37, 0, 65, 0,  91, 0, 0, 0,
-                      0, 67, 90, 4, 0, 0, 0, 47, 0,   114, 125, 0,  100, 0,  0, 37, 0, 31, 0, 0,  26, 0,  0, 0, 100};
-  PostFuncInt8C8(in, bias, out, 10, 5, quant_multiplier, left_shift, right_shift, zp, 0, 127);
-  CompareOutputData(out, co_relu, 50, 1);
-
-  int8_t co_relu6[] = {0, 6, 6, 0, 6, 0, 0, 0, 6, 6, 0, 6, 0, 6, 0, 0, 0, 6, 0, 6, 0, 6, 0, 0, 0,
-                       0, 6, 6, 4, 0, 0, 0, 6, 0, 6, 6, 0, 6, 0, 0, 6, 0, 6, 0, 0, 6, 0, 0, 0, 6};
-  PostFuncInt8C8(in, bias, out, 10, 5, quant_multiplier, left_shift, right_shift, zp, 0, 6);
-  CompareOutputData(out, co_relu6, 50, 1);
-}
-
 int DeConvInt8TestInit1(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor *> *outputs_,
                         ConvParameter *conv_param, int8_t **correct) {
   /* float data from deconv fp32 testcase : DeConvTestInit2 */