!15820 add DeConv2d Coder

From: @zoloft Reviewed-by: @wangchengyuan,@wangchengyuan,@hangangqiang Signed-off-by: @wangchengyuan
2021-04-29 09:14:27 +08:00 · 2021-04-29 09:14:27 +08:00 · 7486574169
parent a22b89ef89 6ea18643bd
commit 7486574169
6 changed files with 387 additions and 1 deletions
--- a/mindspore/lite/micro/cmake/file_list.cmake
+++ b/mindspore/lite/micro/cmake/file_list.cmake
@ -99,6 +99,7 @@ set(CODER_OPCODERS_SRC
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/splice_fp32_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
        #### nnacl int8 coder
        ${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc
        ${MICRO_DIR}/coder/opcoders/nnacl/int8/add_int8_coder.cc
@ -188,11 +189,13 @@ set(LITE_KERNEL_SRC
        ${NNACL_DIR}/fp32/winograd_utils.c
        ${NNACL_DIR}/fp32/pack_fp32.c
        ${NNACL_DIR}/fp32/arithmetic_fp32.c
+        ${NNACL_DIR}/fp32/deconv_fp32.c
+        ${NNACL_DIR}/fp32/matmul_fp32.c
+        ${NNACL_DIR}/fp32/common_func_fp32.c
        ${NNACL_DIR}/int8/quantize.c
        ${NNACL_DIR}/int8/pack_int8.c
        ${NNACL_DIR}/int8/matmul_int8.c
        ${NNACL_DIR}/int8/fixed_point.c
-        ${NNACL_DIR}/fp32/matmul_fp32.c
        ${NNACL_DIR}/int8/arithmetic_int8.c
        ${NNACL_DIR}/int8/add_int8.c
        ${NNACL_DIR}/int8/concat_int8.c
@ -288,6 +291,8 @@ set(LITE_KERNEL_SRC
 if("${X86_64_SIMD}" STREQUAL "sse")
    set(SSE_SRC
            ${NNACL_DIR}/intrinsics/sse/MatMul_Sse.c
+            ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC8.c
+            ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC4.c
            )
    set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C)
 endif()
@ -299,6 +304,8 @@ if("${X86_64_SIMD}" STREQUAL "avx")
    set(AVX_SRC
            ${NNACL_DIR}/intrinsics/avx/common_utils.c
            ${NNACL_DIR}/intrinsics/sse/MatMul_Sse.c
+            ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC8.c
+            ${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC4.c
            ${NNACL_DIR}/assembly/avx/MatmulAvx.S
            )
    set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C)
--- a/mindspore/lite/micro/cmake/package_wrapper.cmake
+++ b/mindspore/lite/micro/cmake/package_wrapper.cmake
@ -7,6 +7,7 @@ set(WRAPPER_SRC
        ${WRAPPER_DIR}/base/optimize_handler_wrapper.c
        ${WRAPPER_DIR}/fp32/matmul_fp32_wrapper.c
        ${WRAPPER_DIR}/fp32/arithmetic_fp32_wrapper.c
+        ${WRAPPER_DIR}/fp32/deconvolution_fp32_wrapper.c
        ${WRAPPER_DIR}/int8/matmul_int8_wrapper.c
        ${WRAPPER_DIR}/int8/add_int8_wrapper.c
        ${WRAPPER_DIR}/int8/concat_int8_wrapper.c
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
@ -0,0 +1,196 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.h"
+#include <memory>
+#include <string>
+#include <vector>
+#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
+#include "nnacl/fp32/winograd_utils.h"
+#include "coder/opcoders/file_collector.h"
+#include "coder/log.h"
+#include "coder/opcoders/parallel.h"
+#include "src/common/version_manager.h"
+#include "coder/opcoders/nnacl/dequant/de_quant.h"
+
+using mindspore::schema::PrimitiveType_Conv2dTransposeFusion;
+namespace mindspore::lite::micro::nnacl {
+int DeConvolutionFP32Coder::InitRunBuf() {
+  pack_output_size_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float);
+  packed_output_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, pack_output_size_, kWorkspace));
+  MS_CHECK_PTR(packed_output_);
+
+  if (target_ == kARM32A) {
+    tmp_buffer_size_ = matmul_param_.row_4_ * matmul_param_.col_8_ * sizeof(float);
+  } else {
+    tmp_buffer_size_ = matmul_param_.row_12_ * matmul_param_.col_8_ * sizeof(float);
+  }
+  tmp_buffer_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, tmp_buffer_size_, kWorkspace));
+  MS_CHECK_PTR(tmp_buffer_);
+
+  if (target_ == kARM32A) {
+    pack_input_size_ = matmul_param_.row_4_ * matmul_param_.deep_ * sizeof(float);
+  } else {
+    pack_input_size_ = matmul_param_.row_12_ * matmul_param_.deep_ * sizeof(float);
+  }
+  packed_input_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, pack_input_size_, kWorkspace));
+  MS_CHECK_PTR(packed_input_);
+  return RET_OK;
+}
+
+int DeConvolutionFP32Coder::InitParam() {
+  input_plane_ = conv_param_->input_h_ * conv_param_->input_w_;
+  kernel_plane_ = conv_param_->kernel_w_ * conv_param_->kernel_h_;
+  output_plane_ = conv_param_->output_h_ * conv_param_->output_w_;
+
+  matmul_param_.row_ = input_plane_;
+  matmul_param_.deep_ = conv_param_->input_channel_;
+  matmul_param_.col_ = conv_param_->output_channel_ * kernel_plane_;
+  matmul_param_.row_12_ = UP_ROUND(matmul_param_.row_, C12NUM);
+  matmul_param_.row_4_ = UP_ROUND(matmul_param_.row_, C4NUM);
+  matmul_param_.col_8_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * kernel_plane_;
+  return RET_OK;
+}
+
+int DeConvolutionFP32Coder::Prepare(CoderContext *const context) {
+  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed.");
+  MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");
+  return Resize();
+}
+
+int DeConvolutionFP32Coder::Resize() {
+  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "init failed.");
+  MS_CHECK_RET_CODE(InitParam(), "init param  failed.");
+  MS_CHECK_RET_CODE(InitRunBuf(), "init run buffer failed.");
+  return RET_OK;
+}
+
+int DeConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
+  int kernel_h = filter_tensor_->Height();
+  int kernel_w = filter_tensor_->Width();
+  int in_channel = filter_tensor_->Channel();
+  int out_channel = filter_tensor_->Batch();
+  conv_param_->input_channel_ = in_channel;
+  conv_param_->output_channel_ = out_channel;
+
+  if (input_tensors_.size() == kInputSize2) {
+    bias_data_size_ = UP_ROUND(out_channel, C4NUM) * sizeof(float);
+    packed_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
+    MS_CHECK_PTR(packed_bias_);
+  }
+
+  int kernel_plane = kernel_h * kernel_w;
+  int pack_weight_size = in_channel * kernel_plane;
+  pack_weight_size_ = pack_weight_size * UP_ROUND(out_channel, C8NUM) * sizeof(float);
+
+  packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
+  MS_CHECK_PTR(packed_weight_);
+
+  NNaclFp32Serializer init_code;
+  if (input_tensors_.size() == kInputSize2) {
+    init_code.CodeMallocExpression(packed_bias_, bias_data_size_);
+    init_code.CodeFunction("memset", packed_bias_, 0, pack_weight_size_);
+    init_code.CodeFunction("memcpy", packed_bias_, bias_tensor_, out_channel * sizeof(float));
+  }
+
+  init_code.CodeMallocExpression(packed_weight_, pack_weight_size_);
+  init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_);
+  init_code.CodeFunction("PackNHWCToC8HWN8Fp32", filter_tensor_, packed_weight_, in_channel, kernel_plane, out_channel);
+
+  context->AppendInitCode(init_code.str());
+  return RET_OK;
+}
+
+int DeConvolutionFP32Coder::DoCode(CoderContext *const context) {
+  Collect(context,
+          {
+            "wrapper/fp32/deconvolution_fp32_wrapper.h",
+            "nnacl/fp32/conv_common_fp32.h",
+            "nnacl/pack.h",
+            "nnacl/fp32/common_func_fp32.h",
+            "nnacl/base/minimal_filtering_generator.h",
+            "nnacl/fp32/matmul_fp32.h",
+            "nnacl/conv_parameter.h",
+            "nnacl/matmul_parameter.h",
+            "nnacl/op_base.h",
+          },
+          {
+            "deconvolution_fp32_wrapper.c",
+            "common_func.c",
+            "conv_common_fp32.c",
+            "matmul_fp32.c",
+            "pack_fp32.c",
+            "deconv_fp32.c",
+            "minimal_filter_generator.c",
+          });
+  if (target_ == kARM32A) {
+    Collect(context, {}, {},
+            {
+              "MatmulFp32.S",
+              "MatmulFp32Opt.S",
+              "PreSum4x16Int8Peroc.S",
+              "PreSum4x16Int8Pert.S",
+              "IndirectGemmInt16to32_8x4.S",
+              "MatmulInt8.S",
+              "MatmulFp32Opt12x4.S",
+            });
+  } else if (target_ == kARM64) {
+    Collect(context, {}, {},
+            {
+              "MatmulFp32.S",
+              "MatmulFp32Opt.S",
+              "PreSum4x16Int8Peroc.S",
+              "MatVecMulFp32.S",
+              "PreSum4x16Int8Peroc.S",
+              "PreSum4x16Int8Pert.S",
+              "IndirectGemmInt16to32_8x4.S",
+              "MatmulInt8.S",
+            });
+  }
+
+  NNaclFp32Serializer code;
+  // call the op function
+  code.CodeFunction("memset", packed_input_, "0", pack_input_size_);
+  code.CodeFunction("memset", packed_output_, "0", pack_output_size_);
+  code.CodeFunction("memset", tmp_buffer_, "0", tmp_buffer_size_);
+  code.CodeStruct("conv_parameter", *conv_param_);
+  code.CodeStruct("matmul_parameter", matmul_param_);
+
+  std::string src_in_ptr_str = allocator_->GetRuntimeAddr(input_tensor_);
+  std::string src_out_ptr_str = allocator_->GetRuntimeAddr(output_tensor_);
+
+  for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
+    input_ptr_ = src_in_ptr_str + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_);
+    output_ptr_ = src_out_ptr_str + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_);
+
+    if (target_ == kARM32A) {
+      code.CodeFunction("RowMajor2Col4Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_);
+    } else {
+      code.CodeFunction("RowMajor2Col12Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_);
+    }
+    code.CodeBaseStruct("DeConvFp32Args", kRunArgs, packed_input_, packed_weight_, packed_bias_, packed_output_,
+                        output_ptr_, tmp_buffer_, "&matmul_parameter", "&conv_parameter");
+    if (!support_parallel_) {
+      code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId);
+    } else {
+      code.CodeFunction(kParallelLaunch, gThreadPool, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_");
+    }
+  }
+  context->AppendCode(code.str());
+  return RET_OK;
+}
+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2dTransposeFusion,
+                   CPUOpCoderCreator<DeConvolutionFP32Coder>);
+}  // namespace mindspore::lite::micro::nnacl
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.h
@ -0,0 +1,65 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_
+#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_
+
+#include <vector>
+#include <string>
+#include "nnacl/conv_parameter.h"
+#include "coder/opcoders/base/conv2d_base_coder.h"
+#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
+#include "nnacl/fp32/deconv_fp32.h"
+#include "nnacl/fp32/matmul_fp32.h"
+
+namespace mindspore::lite::micro::nnacl {
+class DeConvolutionFP32Coder final : public Conv2DBaseCoder {
+ public:
+  DeConvolutionFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                         const Model::Node *node, size_t node_index, Target target)
+      : Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
+
+  int Prepare(CoderContext *const context) override;
+
+  int DoCode(CoderContext *const context) override;
+
+  ~DeConvolutionFP32Coder() override = default;
+
+ private:
+  int InitWeightBias(CoderContext *const context);
+  int Resize();
+  int InitRunBuf();
+  int InitParam();
+
+  MatMulParameter matmul_param_{};
+  size_t pack_output_size_{0};
+  size_t tmp_buffer_size_{0};
+  size_t pack_input_size_{0};
+  size_t bias_data_size_{0};
+  size_t pack_weight_size_{0};
+  int input_plane_{0};
+  int kernel_plane_{0};
+  int output_plane_{0};
+  float *packed_bias_{nullptr};
+  float *packed_weight_{nullptr};
+  float *packed_input_{nullptr};
+  float *packed_output_{nullptr};
+  float *tmp_buffer_{nullptr};
+  std::string input_ptr_;
+  std::string output_ptr_;
+};
+}  // namespace mindspore::lite::micro::nnacl
+#endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_
--- a/mindspore/lite/micro/coder/wrapper/fp32/deconvolution_fp32_wrapper.c
+++ b/mindspore/lite/micro/coder/wrapper/fp32/deconvolution_fp32_wrapper.c
@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wrapper/fp32/deconvolution_fp32_wrapper.h"
+#include "nnacl/fp32/deconv_fp32.h"
+#include "nnacl/fp32/matmul_fp32.h"
+
+int DoDeconvFp32(const float *packed_input, const float *packed_weight, const float *packed_bias, float *packed_output,
+                 float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param,
+                 const ConvParameter *conv_param, int task_id) {
+  int thread_count = MSMIN(conv_param->thread_num_, UP_DIV(conv_param->output_channel_, C8NUM));
+  int thread_stride = UP_DIV(UP_DIV(conv_param->output_channel_, C8NUM), thread_count);
+  int res_stride = UP_DIV(conv_param->output_channel_, C8NUM) - task_id * thread_stride;
+  int oc = MSMIN(thread_stride, res_stride);
+  int cur_stride = thread_stride * C8NUM;
+  res_stride = conv_param->output_channel_ - task_id * thread_stride * C8NUM;
+  int oc_res = MSMIN(cur_stride, res_stride);
+  if (oc <= 0 || oc_res <= 0) {
+    return NNACL_OK;
+  }
+
+  int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
+  int output_plane = conv_param->output_h_ * conv_param->output_w_;
+
+#if defined(ENABLE_ARM32)
+  float *tmp_buffer = tmp_ori_buffer + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->row_4_;
+  MatMulOpt(packed_input, packed_weight + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->deep_,
+            tmp_buffer, NULL, ActType_No, matmul_param->deep_, matmul_param->row_4_, oc * C8NUM * kernel_plane,
+            matmul_param->col_, OutType_C8);
+#else
+  float *tmp_buffer = tmp_ori_buffer + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->row_12_;
+  MatMulOpt(packed_input, packed_weight + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->deep_,
+            tmp_buffer, NULL, ActType_No, matmul_param->deep_, matmul_param->row_12_, oc * C8NUM * kernel_plane,
+            matmul_param->col_, OutType_C8);
+#endif
+
+  DeConvPostFp32C8(tmp_buffer, packed_output + task_id * thread_stride * C8NUM * output_plane,
+                   packed_bias + thread_stride * task_id * C8NUM, output + task_id * thread_stride * C8NUM, oc_res,
+                   conv_param);
+  return NNACL_OK;
+}
+
+int DeConvFp32Run(void *cdata, int task_id) {
+  DeConvFp32Args *args = (DeConvFp32Args *)cdata;
+  const MatMulParameter *matmul_param = args->matmul_param_;
+  const ConvParameter *conv_param = args->conv_param_;
+  const float *packed_input = args->packed_input_;
+  const float *packed_weight = args->packed_weight_;
+  const float *packed_bias = args->packed_bias_;
+  float *packed_output = args->packed_output_;
+  float *output = args->output_;
+  float *tmp_buffer = args->tmp_buffer_;
+  DoDeconvFp32(packed_input, packed_weight, packed_bias, packed_output, output, tmp_buffer, matmul_param, conv_param,
+               task_id);
+  return NNACL_OK;
+}
--- a/mindspore/lite/micro/coder/wrapper/fp32/deconvolution_fp32_wrapper.h
+++ b/mindspore/lite/micro/coder/wrapper/fp32/deconvolution_fp32_wrapper.h
@ -0,0 +1,48 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_
+#define MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_
+
+#include "nnacl/errorcode.h"
+#include "nnacl/conv_parameter.h"
+#include "nnacl/matmul_parameter.h"
+
+typedef struct {
+  const float *packed_input_;
+  const float *packed_weight_;
+  const float *packed_bias_;
+  float *packed_output_;
+  float *output_;
+  float *tmp_buffer_;
+  const MatMulParameter *matmul_param_;
+  const ConvParameter *conv_param_;
+} DeConvFp32Args;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int DoDeconvFp32(const float *packed_input, const float *packed_weight, const float *packed_bias, float *packed_output,
+                 float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param,
+                 const ConvParameter *conv_param, int task_id);
+
+int DeConvFp32Run(void *cdata, int task_id);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_