!15820 add DeConv2d Coder

From: @zoloft
Reviewed-by: @wangchengyuan,@wangchengyuan,@hangangqiang
Signed-off-by: @wangchengyuan
This commit is contained in:
mindspore-ci-bot 2021-04-29 09:14:27 +08:00 committed by Gitee
commit 7486574169
6 changed files with 387 additions and 1 deletions

View File

@ -99,6 +99,7 @@ set(CODER_OPCODERS_SRC
${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/splice_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.cc
#### nnacl int8 coder
${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/add_int8_coder.cc
@ -188,11 +189,13 @@ set(LITE_KERNEL_SRC
${NNACL_DIR}/fp32/winograd_utils.c
${NNACL_DIR}/fp32/pack_fp32.c
${NNACL_DIR}/fp32/arithmetic_fp32.c
${NNACL_DIR}/fp32/deconv_fp32.c
${NNACL_DIR}/fp32/matmul_fp32.c
${NNACL_DIR}/fp32/common_func_fp32.c
${NNACL_DIR}/int8/quantize.c
${NNACL_DIR}/int8/pack_int8.c
${NNACL_DIR}/int8/matmul_int8.c
${NNACL_DIR}/int8/fixed_point.c
${NNACL_DIR}/fp32/matmul_fp32.c
${NNACL_DIR}/int8/arithmetic_int8.c
${NNACL_DIR}/int8/add_int8.c
${NNACL_DIR}/int8/concat_int8.c
@ -288,6 +291,8 @@ set(LITE_KERNEL_SRC
if("${X86_64_SIMD}" STREQUAL "sse")
set(SSE_SRC
${NNACL_DIR}/intrinsics/sse/MatMul_Sse.c
${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC8.c
${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC4.c
)
set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C)
endif()
@ -299,6 +304,8 @@ if("${X86_64_SIMD}" STREQUAL "avx")
set(AVX_SRC
${NNACL_DIR}/intrinsics/avx/common_utils.c
${NNACL_DIR}/intrinsics/sse/MatMul_Sse.c
${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC8.c
${NNACL_DIR}/intrinsics/sse/PostFuncBiasReluC4.c
${NNACL_DIR}/assembly/avx/MatmulAvx.S
)
set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C)

View File

@ -7,6 +7,7 @@ set(WRAPPER_SRC
${WRAPPER_DIR}/base/optimize_handler_wrapper.c
${WRAPPER_DIR}/fp32/matmul_fp32_wrapper.c
${WRAPPER_DIR}/fp32/arithmetic_fp32_wrapper.c
${WRAPPER_DIR}/fp32/deconvolution_fp32_wrapper.c
${WRAPPER_DIR}/int8/matmul_int8_wrapper.c
${WRAPPER_DIR}/int8/add_int8_wrapper.c
${WRAPPER_DIR}/int8/concat_int8_wrapper.c

View File

@ -0,0 +1,196 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "coder/opcoders/nnacl/fp32/deconv2d_fp32_coder.h"
#include <memory>
#include <string>
#include <vector>
#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
#include "nnacl/fp32/winograd_utils.h"
#include "coder/opcoders/file_collector.h"
#include "coder/log.h"
#include "coder/opcoders/parallel.h"
#include "src/common/version_manager.h"
#include "coder/opcoders/nnacl/dequant/de_quant.h"
using mindspore::schema::PrimitiveType_Conv2dTransposeFusion;
namespace mindspore::lite::micro::nnacl {
int DeConvolutionFP32Coder::InitRunBuf() {
pack_output_size_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * output_plane_ * sizeof(float);
packed_output_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, pack_output_size_, kWorkspace));
MS_CHECK_PTR(packed_output_);
if (target_ == kARM32A) {
tmp_buffer_size_ = matmul_param_.row_4_ * matmul_param_.col_8_ * sizeof(float);
} else {
tmp_buffer_size_ = matmul_param_.row_12_ * matmul_param_.col_8_ * sizeof(float);
}
tmp_buffer_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, tmp_buffer_size_, kWorkspace));
MS_CHECK_PTR(tmp_buffer_);
if (target_ == kARM32A) {
pack_input_size_ = matmul_param_.row_4_ * matmul_param_.deep_ * sizeof(float);
} else {
pack_input_size_ = matmul_param_.row_12_ * matmul_param_.deep_ * sizeof(float);
}
packed_input_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, pack_input_size_, kWorkspace));
MS_CHECK_PTR(packed_input_);
return RET_OK;
}
int DeConvolutionFP32Coder::InitParam() {
input_plane_ = conv_param_->input_h_ * conv_param_->input_w_;
kernel_plane_ = conv_param_->kernel_w_ * conv_param_->kernel_h_;
output_plane_ = conv_param_->output_h_ * conv_param_->output_w_;
matmul_param_.row_ = input_plane_;
matmul_param_.deep_ = conv_param_->input_channel_;
matmul_param_.col_ = conv_param_->output_channel_ * kernel_plane_;
matmul_param_.row_12_ = UP_ROUND(matmul_param_.row_, C12NUM);
matmul_param_.row_4_ = UP_ROUND(matmul_param_.row_, C4NUM);
matmul_param_.col_8_ = UP_ROUND(conv_param_->output_channel_, C8NUM) * kernel_plane_;
return RET_OK;
}
int DeConvolutionFP32Coder::Prepare(CoderContext *const context) {
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed.");
MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");
return Resize();
}
int DeConvolutionFP32Coder::Resize() {
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "init failed.");
MS_CHECK_RET_CODE(InitParam(), "init param failed.");
MS_CHECK_RET_CODE(InitRunBuf(), "init run buffer failed.");
return RET_OK;
}
int DeConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
int kernel_h = filter_tensor_->Height();
int kernel_w = filter_tensor_->Width();
int in_channel = filter_tensor_->Channel();
int out_channel = filter_tensor_->Batch();
conv_param_->input_channel_ = in_channel;
conv_param_->output_channel_ = out_channel;
if (input_tensors_.size() == kInputSize2) {
bias_data_size_ = UP_ROUND(out_channel, C4NUM) * sizeof(float);
packed_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
MS_CHECK_PTR(packed_bias_);
}
int kernel_plane = kernel_h * kernel_w;
int pack_weight_size = in_channel * kernel_plane;
pack_weight_size_ = pack_weight_size * UP_ROUND(out_channel, C8NUM) * sizeof(float);
packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
MS_CHECK_PTR(packed_weight_);
NNaclFp32Serializer init_code;
if (input_tensors_.size() == kInputSize2) {
init_code.CodeMallocExpression(packed_bias_, bias_data_size_);
init_code.CodeFunction("memset", packed_bias_, 0, pack_weight_size_);
init_code.CodeFunction("memcpy", packed_bias_, bias_tensor_, out_channel * sizeof(float));
}
init_code.CodeMallocExpression(packed_weight_, pack_weight_size_);
init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_);
init_code.CodeFunction("PackNHWCToC8HWN8Fp32", filter_tensor_, packed_weight_, in_channel, kernel_plane, out_channel);
context->AppendInitCode(init_code.str());
return RET_OK;
}
int DeConvolutionFP32Coder::DoCode(CoderContext *const context) {
Collect(context,
{
"wrapper/fp32/deconvolution_fp32_wrapper.h",
"nnacl/fp32/conv_common_fp32.h",
"nnacl/pack.h",
"nnacl/fp32/common_func_fp32.h",
"nnacl/base/minimal_filtering_generator.h",
"nnacl/fp32/matmul_fp32.h",
"nnacl/conv_parameter.h",
"nnacl/matmul_parameter.h",
"nnacl/op_base.h",
},
{
"deconvolution_fp32_wrapper.c",
"common_func.c",
"conv_common_fp32.c",
"matmul_fp32.c",
"pack_fp32.c",
"deconv_fp32.c",
"minimal_filter_generator.c",
});
if (target_ == kARM32A) {
Collect(context, {}, {},
{
"MatmulFp32.S",
"MatmulFp32Opt.S",
"PreSum4x16Int8Peroc.S",
"PreSum4x16Int8Pert.S",
"IndirectGemmInt16to32_8x4.S",
"MatmulInt8.S",
"MatmulFp32Opt12x4.S",
});
} else if (target_ == kARM64) {
Collect(context, {}, {},
{
"MatmulFp32.S",
"MatmulFp32Opt.S",
"PreSum4x16Int8Peroc.S",
"MatVecMulFp32.S",
"PreSum4x16Int8Peroc.S",
"PreSum4x16Int8Pert.S",
"IndirectGemmInt16to32_8x4.S",
"MatmulInt8.S",
});
}
NNaclFp32Serializer code;
// call the op function
code.CodeFunction("memset", packed_input_, "0", pack_input_size_);
code.CodeFunction("memset", packed_output_, "0", pack_output_size_);
code.CodeFunction("memset", tmp_buffer_, "0", tmp_buffer_size_);
code.CodeStruct("conv_parameter", *conv_param_);
code.CodeStruct("matmul_parameter", matmul_param_);
std::string src_in_ptr_str = allocator_->GetRuntimeAddr(input_tensor_);
std::string src_out_ptr_str = allocator_->GetRuntimeAddr(output_tensor_);
for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
input_ptr_ = src_in_ptr_str + std::to_string(batch_index * input_plane_ * conv_param_->input_channel_);
output_ptr_ = src_out_ptr_str + std::to_string(batch_index * output_plane_ * conv_param_->output_channel_);
if (target_ == kARM32A) {
code.CodeFunction("RowMajor2Col4Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_);
} else {
code.CodeFunction("RowMajor2Col12Major", input_ptr_, packed_input_, matmul_param_.row_, matmul_param_.deep_);
}
code.CodeBaseStruct("DeConvFp32Args", kRunArgs, packed_input_, packed_weight_, packed_bias_, packed_output_,
output_ptr_, tmp_buffer_, "&matmul_parameter", "&conv_parameter");
if (!support_parallel_) {
code.CodeFunction("DeConvFp32Run", kRunArgsAddr, kDefaultTaskId);
} else {
code.CodeFunction(kParallelLaunch, gThreadPool, "DeConvFp32Run", kRunArgsAddr, "conv_parameter.thread_num_");
}
}
context->AppendCode(code.str());
return RET_OK;
}
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2dTransposeFusion,
CPUOpCoderCreator<DeConvolutionFP32Coder>);
} // namespace mindspore::lite::micro::nnacl

View File

@ -0,0 +1,65 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_
#include <vector>
#include <string>
#include "nnacl/conv_parameter.h"
#include "coder/opcoders/base/conv2d_base_coder.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "nnacl/fp32/deconv_fp32.h"
#include "nnacl/fp32/matmul_fp32.h"
namespace mindspore::lite::micro::nnacl {
class DeConvolutionFP32Coder final : public Conv2DBaseCoder {
public:
DeConvolutionFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
int Prepare(CoderContext *const context) override;
int DoCode(CoderContext *const context) override;
~DeConvolutionFP32Coder() override = default;
private:
int InitWeightBias(CoderContext *const context);
int Resize();
int InitRunBuf();
int InitParam();
MatMulParameter matmul_param_{};
size_t pack_output_size_{0};
size_t tmp_buffer_size_{0};
size_t pack_input_size_{0};
size_t bias_data_size_{0};
size_t pack_weight_size_{0};
int input_plane_{0};
int kernel_plane_{0};
int output_plane_{0};
float *packed_bias_{nullptr};
float *packed_weight_{nullptr};
float *packed_input_{nullptr};
float *packed_output_{nullptr};
float *tmp_buffer_{nullptr};
std::string input_ptr_;
std::string output_ptr_;
};
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_DECONV2D_FP32_CODER_H_

View File

@ -0,0 +1,69 @@
/*
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "wrapper/fp32/deconvolution_fp32_wrapper.h"
#include "nnacl/fp32/deconv_fp32.h"
#include "nnacl/fp32/matmul_fp32.h"
int DoDeconvFp32(const float *packed_input, const float *packed_weight, const float *packed_bias, float *packed_output,
float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param,
const ConvParameter *conv_param, int task_id) {
int thread_count = MSMIN(conv_param->thread_num_, UP_DIV(conv_param->output_channel_, C8NUM));
int thread_stride = UP_DIV(UP_DIV(conv_param->output_channel_, C8NUM), thread_count);
int res_stride = UP_DIV(conv_param->output_channel_, C8NUM) - task_id * thread_stride;
int oc = MSMIN(thread_stride, res_stride);
int cur_stride = thread_stride * C8NUM;
res_stride = conv_param->output_channel_ - task_id * thread_stride * C8NUM;
int oc_res = MSMIN(cur_stride, res_stride);
if (oc <= 0 || oc_res <= 0) {
return NNACL_OK;
}
int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
int output_plane = conv_param->output_h_ * conv_param->output_w_;
#if defined(ENABLE_ARM32)
float *tmp_buffer = tmp_ori_buffer + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->row_4_;
MatMulOpt(packed_input, packed_weight + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->deep_,
tmp_buffer, NULL, ActType_No, matmul_param->deep_, matmul_param->row_4_, oc * C8NUM * kernel_plane,
matmul_param->col_, OutType_C8);
#else
float *tmp_buffer = tmp_ori_buffer + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->row_12_;
MatMulOpt(packed_input, packed_weight + task_id * thread_stride * C8NUM * kernel_plane * matmul_param->deep_,
tmp_buffer, NULL, ActType_No, matmul_param->deep_, matmul_param->row_12_, oc * C8NUM * kernel_plane,
matmul_param->col_, OutType_C8);
#endif
DeConvPostFp32C8(tmp_buffer, packed_output + task_id * thread_stride * C8NUM * output_plane,
packed_bias + thread_stride * task_id * C8NUM, output + task_id * thread_stride * C8NUM, oc_res,
conv_param);
return NNACL_OK;
}
int DeConvFp32Run(void *cdata, int task_id) {
DeConvFp32Args *args = (DeConvFp32Args *)cdata;
const MatMulParameter *matmul_param = args->matmul_param_;
const ConvParameter *conv_param = args->conv_param_;
const float *packed_input = args->packed_input_;
const float *packed_weight = args->packed_weight_;
const float *packed_bias = args->packed_bias_;
float *packed_output = args->packed_output_;
float *output = args->output_;
float *tmp_buffer = args->tmp_buffer_;
DoDeconvFp32(packed_input, packed_weight, packed_bias, packed_output, output, tmp_buffer, matmul_param, conv_param,
task_id);
return NNACL_OK;
}

View File

@ -0,0 +1,48 @@
/*
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_
#define MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_
#include "nnacl/errorcode.h"
#include "nnacl/conv_parameter.h"
#include "nnacl/matmul_parameter.h"
typedef struct {
const float *packed_input_;
const float *packed_weight_;
const float *packed_bias_;
float *packed_output_;
float *output_;
float *tmp_buffer_;
const MatMulParameter *matmul_param_;
const ConvParameter *conv_param_;
} DeConvFp32Args;
#ifdef __cplusplus
extern "C" {
#endif
int DoDeconvFp32(const float *packed_input, const float *packed_weight, const float *packed_bias, float *packed_output,
float *output, float *tmp_ori_buffer, const MatMulParameter *matmul_param,
const ConvParameter *conv_param, int task_id);
int DeConvFp32Run(void *cdata, int task_id);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_MICRO_CODER_WRAPPER_DECONVOLUTION_FP32_WRAPPER_H_