diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake index e0c50c576c6..e8433927e81 100644 --- a/mindspore/lite/micro/cmake/file_list.cmake +++ b/mindspore/lite/micro/cmake/file_list.cmake @@ -76,6 +76,7 @@ set(CODER_OPCODERS_SRC ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc @@ -281,8 +282,8 @@ endif() #### avx if("${X86_64_SIMD}" STREQUAL "avx") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2") set(AVX_SRC ${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c diff --git a/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc b/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc index 4b898da8ee1..7735f5711fe 100644 --- a/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc @@ -20,23 +20,80 @@ #include "nnacl/fp32/winograd_utils.h" #include "nnacl/int8/quantize.h" #include "coder/log.h" +namespace mindspore::lite::micro { -namespace { -int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) { +Conv2DBaseCoder::~Conv2DBaseCoder() { + FreeConvQuantParams(); + conv_param_ = nullptr; + conv_quant_arg_ = nullptr; + filter_tensor_ = nullptr; + bias_tensor_ = nullptr; +} + +void Conv2DBaseCoder::FreeConvQuantParams() { + if (conv_quant_arg_ == nullptr) { + return; + } + if (conv_quant_arg_->real_multiplier_ != nullptr) { + free(conv_quant_arg_->real_multiplier_); + conv_quant_arg_->real_multiplier_ = nullptr; + } + if (conv_quant_arg_->left_shift_ != nullptr) { + free(conv_quant_arg_->left_shift_); + conv_quant_arg_->left_shift_ = nullptr; + } + if (conv_quant_arg_->right_shift_ != nullptr) { + free(conv_quant_arg_->right_shift_); + conv_quant_arg_->right_shift_ = nullptr; + } + if (conv_quant_arg_->quant_multiplier_ != nullptr) { + free(conv_quant_arg_->quant_multiplier_); + conv_quant_arg_->quant_multiplier_ = nullptr; + } + if (conv_quant_arg_->out_act_min_ != nullptr) { + free(conv_quant_arg_->out_act_min_); + conv_quant_arg_->out_act_min_ = nullptr; + } + if (conv_quant_arg_->out_act_max_ != nullptr) { + free(conv_quant_arg_->out_act_max_); + conv_quant_arg_->out_act_max_ = nullptr; + } + if (conv_quant_arg_->input_quant_args_ != nullptr) { + free(conv_quant_arg_->input_quant_args_); + conv_quant_arg_->input_quant_args_ = nullptr; + } + if (conv_quant_arg_->filter_quant_args_ != nullptr) { + free(conv_quant_arg_->filter_quant_args_); + conv_quant_arg_->filter_quant_args_ = nullptr; + } + if (conv_quant_arg_->output_quant_args_ != nullptr) { + free(conv_quant_arg_->output_quant_args_); + conv_quant_arg_->output_quant_args_ = nullptr; + } +} + +int Conv2DBaseCoder::MallocConvQuantParams(size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) { MS_CHECK_TRUE(input_arg_num > 0, "invalid value of input_arg_num"); MS_CHECK_TRUE(filter_arg_num > 0, "invalid value of filter_arg_num"); MS_CHECK_TRUE(output_arg_num > 0, "invalid value of output_arg_num"); - quant_arg->input_quant_args_ = static_cast(malloc(input_arg_num * sizeof(struct QuantArg))); - MS_CHECK_PTR(quant_arg->input_quant_args_); - quant_arg->filter_quant_args_ = static_cast(malloc(filter_arg_num * sizeof(QuantArg))); - MS_CHECK_PTR(quant_arg->filter_quant_args_); - quant_arg->output_quant_args_ = static_cast(malloc(output_arg_num * sizeof(QuantArg))); - MS_CHECK_PTR(quant_arg->output_quant_args_); - return mindspore::lite::RET_OK; + conv_quant_arg_->input_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(input_arg_num * sizeof(::QuantArg))); + if (conv_quant_arg_->input_quant_args_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } + conv_quant_arg_->filter_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(filter_arg_num * sizeof(::QuantArg))); + if (conv_quant_arg_->filter_quant_args_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } + conv_quant_arg_->output_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(output_arg_num * sizeof(::QuantArg))); + if (conv_quant_arg_->output_quant_args_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } + return RET_OK; } -} // namespace -namespace mindspore::lite::micro { std::string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) { std::string ret; if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) { @@ -116,7 +173,7 @@ int Conv2DBaseCoder::MallocQuantParam() { conv_quant_arg_->input_arg_num_ = input_arg_num; conv_quant_arg_->filter_arg_num_ = filter_arg_num; conv_quant_arg_->output_arg_num_ = output_arg_num; - MallocConvQuantParams(conv_quant_arg_, input_arg_num, filter_arg_num, output_arg_num); + MallocConvQuantParams(input_arg_num, filter_arg_num, output_arg_num); return RET_OK; } @@ -125,7 +182,7 @@ int Conv2DBaseCoder::SetInputTensorQuantParam() { if (in_arg_num == kPerTensor) { QuantArg input_quant_arg = input_tensor_->quant_params().at(0); conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint; - conv_quant_arg_->input_quant_args_[0].scale_ = input_quant_arg.scale; + conv_quant_arg_->input_quant_args_[0].scale_ = static_cast(input_quant_arg.scale); return RET_OK; } else { // per channel @@ -139,12 +196,12 @@ int Conv2DBaseCoder::SetFilterTensorQuantParam() { if (weight_arg_num == kPerTensor) { QuantArg weight_quant_arg = filter_tensor_->quant_params().at(0); conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint; - conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale; + conv_quant_arg_->filter_quant_args_[0].scale_ = static_cast(weight_quant_arg.scale); } else { std::vector weight_quant_arg = filter_tensor_->quant_params(); for (int i = 0; i < static_cast(weight_arg_num); ++i) { conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint; - conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale; + conv_quant_arg_->filter_quant_args_[i].scale_ = static_cast(weight_quant_arg[i].scale); } } return RET_OK; @@ -155,7 +212,7 @@ int Conv2DBaseCoder::SetOutputTensorQuantParam() { if (out_arg_num == kPerTensor) { QuantArg output_quant_arg = output_tensor_->quant_params().at(0); conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint; - conv_quant_arg_->output_quant_args_[0].scale_ = output_quant_arg.scale; + conv_quant_arg_->output_quant_args_[0].scale_ = static_cast(output_quant_arg.scale); } else { MS_LOG(ERROR) << "Not Support Per Channel for input now."; return RET_ERROR; @@ -170,17 +227,35 @@ int Conv2DBaseCoder::SetQuantMultiplier() { weight_arg_num = conv_quant_arg_->filter_arg_num_; } conv_quant_arg_->real_multiplier_ = reinterpret_cast(malloc(weight_arg_num * sizeof(double))); - MS_CHECK_PTR(conv_quant_arg_->real_multiplier_); + if (conv_quant_arg_->real_multiplier_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } conv_quant_arg_->left_shift_ = reinterpret_cast(malloc(weight_arg_num * sizeof(int32_t))); - MS_CHECK_PTR(conv_quant_arg_->left_shift_); + if (conv_quant_arg_->left_shift_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } conv_quant_arg_->right_shift_ = reinterpret_cast(malloc(weight_arg_num * sizeof(int32_t))); - MS_CHECK_PTR(conv_quant_arg_->right_shift_); + if (conv_quant_arg_->right_shift_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } conv_quant_arg_->quant_multiplier_ = reinterpret_cast(malloc(weight_arg_num * sizeof(int32_t))); - MS_CHECK_PTR(conv_quant_arg_->quant_multiplier_); + if (conv_quant_arg_->quant_multiplier_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } conv_quant_arg_->out_act_min_ = reinterpret_cast(malloc(sizeof(int32_t))); - MS_CHECK_PTR(conv_quant_arg_->out_act_min_); + if (conv_quant_arg_->out_act_min_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } conv_quant_arg_->out_act_max_ = reinterpret_cast(malloc(sizeof(int32_t))); - MS_CHECK_PTR(conv_quant_arg_->out_act_max_); + if (conv_quant_arg_->out_act_max_ == nullptr) { + FreeConvQuantParams(); + return RET_ERROR; + } for (int i = 0; i < weight_arg_num; ++i) { const auto in_scale = static_cast(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_); @@ -197,7 +272,7 @@ int Conv2DBaseCoder::SetQuantMultiplier() { return RET_OK; } -int Conv2DBaseCoder::CheckResizeValid() const { +int Conv2DBaseCoder::CheckResizeValid() { // ===============check in channel================= // int32_t filter_in_channel = filter_tensor_->Channel(); int32_t resize_in_channel = input_tensor_->Channel(); @@ -240,10 +315,6 @@ int Conv2DBaseCoder::SetQuantParam() { MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed."); SetRoundingAndMultipilerMode(); MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed."); - // now only consider per tensor for output - MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_min_); - MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_max_); - MS_CHECK_PTR(conv_param_->conv_quant_arg_.output_quant_args_); bool relu = conv_param_->act_type_ == ActType_Relu; bool relu6 = conv_param_->act_type_ == ActType_Relu6; CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_, diff --git a/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h b/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h index 982b28f7a79..5b5044b469c 100644 --- a/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h +++ b/mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h @@ -32,24 +32,7 @@ class Conv2DBaseCoder : public OperatorCoder { const Model::Node *node, size_t node_index, Target target) : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} - ~Conv2DBaseCoder() override { - if (conv_quant_arg_ == nullptr) { - return; - } - free(conv_quant_arg_->real_multiplier_); - free(conv_quant_arg_->left_shift_); - free(conv_quant_arg_->right_shift_); - free(conv_quant_arg_->quant_multiplier_); - free(conv_quant_arg_->out_act_min_); - free(conv_quant_arg_->out_act_max_); - free(conv_quant_arg_->input_quant_args_); - free(conv_quant_arg_->filter_quant_args_); - free(conv_quant_arg_->output_quant_args_); - conv_param_ = nullptr; - conv_quant_arg_ = nullptr; - filter_tensor_ = nullptr; - bias_tensor_ = nullptr; - } + ~Conv2DBaseCoder() override; protected: virtual int Init(); @@ -68,7 +51,7 @@ class Conv2DBaseCoder : public OperatorCoder { int SetQuantMultiplier(); - int CheckResizeValid() const; + int CheckResizeValid(); int SetIfPerChannel(); @@ -80,6 +63,11 @@ class Conv2DBaseCoder : public OperatorCoder { std::string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format); + private: + int MallocConvQuantParams(size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num); + void FreeConvQuantParams(); + + protected: ConvParameter *conv_param_{nullptr}; ConvQuantArg *conv_quant_arg_{nullptr}; diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc new file mode 100644 index 00000000000..95c91876181 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc @@ -0,0 +1,147 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h" +#include "src/common/version_manager.h" +#include "src/ops/populate/populate_register.h" +#include "nnacl/fp32/winograd_utils.h" +#include "coder/opcoders/nnacl/fp32/convolution_fp32_coder.h" +#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h" +#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" +using mindspore::schema::PrimitiveType_Conv2DFusion; +namespace mindspore::lite::micro::nnacl { + +int ConvDelegateCoder::Prepare(CoderContext *const context) { + // Update shape info of input and output + SetInputOutputShapeInfo(reinterpret_cast(parameter_), input_tensor_, output_tensor_); + if (conv_coder_ == nullptr) { + // need to select actual execute coder here + conv_coder_ = CPUConvolutionFP32CoderSelect(input_tensors_, output_tensors_, node_, node_index(), target_); + MS_CHECK_PTR(conv_coder_); + const void *primitive = node_->primitive_; + MS_CHECK_PTR(primitive); + int primitive_type = GetPrimitiveType(node_->primitive_); + int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); + ParameterGen parameter_gen = + PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node_->primitive_), schema_version); + MS_CHECK_PTR(parameter_gen); + OpParameter *op_parameter = parameter_gen(node_->primitive_); + op_parameter->thread_num_ = thread_num_; + conv_coder_->set_type(primitive_type); + conv_coder_->set_thread_num(thread_num_); + conv_coder_->set_parameter(op_parameter); + } + return conv_coder_->Prepare(context); +} + +int ConvDelegateCoder::DoCode(CoderContext *const context) { return conv_coder_->DoCode(context); } + +void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output) { + conv_param->input_batch_ = input->Batch(); + conv_param->input_h_ = input->Height(); + conv_param->input_w_ = input->Width(); + conv_param->input_channel_ = input->Channel(); + conv_param->output_batch_ = output->Batch(); + conv_param->output_h_ = output->Height(); + conv_param->output_w_ = output->Width(); + conv_param->output_channel_ = output->Channel(); +} + +std::unique_ptr CPUConvolutionFP32CoderSelect(const std::vector &in_tensors, + const std::vector &out_tensors, + const Model::Node *node, size_t node_index, + Target target) { + const void *primitive = node->primitive_; + if (primitive == nullptr) { + return nullptr; + } + int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); + ParameterGen paramGen = + PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); + if (paramGen == nullptr) { + MS_LOG(ERROR) << "parameter generator is null"; + return nullptr; + } + auto conv_param = reinterpret_cast(paramGen(node->primitive_)); + bool use_winograd = false; + int out_unit = 0; + int kernel_h = conv_param->kernel_h_; + int kernel_w = conv_param->kernel_w_; + conv_param->input_h_ = in_tensors.at(kInputIndex)->Height(); + conv_param->input_w_ = in_tensors.at(kInputIndex)->Width(); + conv_param->input_channel_ = in_tensors.at(kInputIndex)->Channel(); + conv_param->output_h_ = out_tensors.at(kOutputIndex)->Height(); + conv_param->output_w_ = out_tensors.at(kOutputIndex)->Width(); + conv_param->output_channel_ = out_tensors.at(kOutputIndex)->Channel(); + conv_param->op_parameter_.thread_num_ = 1; + use_winograd = CheckIfUseWinograd(&out_unit, conv_param); + free(conv_param); + std::unique_ptr coder; + if (kernel_h == 1 && kernel_w == 1) { + MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; + coder = CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); + } else if (use_winograd) { + MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder"; + coder = std::make_unique(in_tensors, out_tensors, node, node_index, target, out_unit); + } else { + MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; + coder = CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); + } + return coder; +} + +std::unique_ptr CreateDelegateConv(const std::vector &in_tensors, + const std::vector &out_tensors, const Model::Node *node, + size_t node_index, Target target) { + return CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); +} + +std::unique_ptr CPUConvDwFp32CoderCreator(const std::vector &in_tensors, + const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) { + return CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); +} + +std::unique_ptr CPUConv2DFusionFP32CoderCreator(const std::vector &in_tensors, + const std::vector &out_tensors, + const Model::Node *node, size_t node_index, + Target target) { + const void *primitive = node->primitive_; + if (primitive == nullptr) { + return nullptr; + } + int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); + ParameterGen paramGen = + PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); + if (paramGen == nullptr) { + MS_LOG(ERROR) << "parameter generator is null"; + return nullptr; + } + auto conv_param = reinterpret_cast(paramGen(node->primitive_)); + std::unique_ptr coder; + if (conv_param->group_ == 1) { + coder = CreateDelegateConv(in_tensors, out_tensors, node, node_index, target); + } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { + coder = CPUConvDwFp32CoderCreator(in_tensors, out_tensors, node, node_index, target); + } else { + // GroupConv + return nullptr; + } + return coder; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator) +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h new file mode 100644 index 00000000000..bca09218a79 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h @@ -0,0 +1,56 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H +#define MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H +#include +#include +#include "coder/opcoders/op_coder.h" +#include "nnacl/conv_parameter.h" +namespace mindspore::lite::micro::nnacl { +class ConvDelegateCoder : public OperatorCoder { + public: + ConvDelegateCoder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} + + ~ConvDelegateCoder() override = default; + int Prepare(CoderContext *const context) override; + int DoCode(CoderContext *const context) override; + + protected: + std::unique_ptr conv_coder_ = nullptr; +}; + +void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output); +std::unique_ptr CPUConvolutionFP32CoderSelect(const std::vector &in_tensors, + const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target); +std::unique_ptr CreateDelegateConv(const std::vector &in_tensors, + const std::vector &out_tensors, const Model::Node *node, + size_t node_index, Target target); +std::unique_ptr CPUConvDwFp32CoderCreator(const std::vector &in_tensors, + const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target); + +std::unique_ptr CPUConv2DFusionFP32CoderCreator(const std::vector &in_tensors, + const std::vector &out_tensors, + const Model::Node *node, size_t node_index, + Target target); + +} // namespace mindspore::lite::micro::nnacl + +#endif // MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc index 59e3716aa87..8a4c6dcc37e 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc @@ -32,17 +32,14 @@ using mindspore::schema::PrimitiveType_Conv2DFusion; namespace mindspore::lite::micro::nnacl { int ConvolutionFP32Coder::InitTmpBuffer() { int in_channel = conv_param_->input_channel_; - int uint_size; - if (target_ == kARM32A) { - uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C4NUM * thread_num_; - } else { - uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_num_; - } + int uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_num_; packed_input_size_ = uint_size * sizeof(float); packed_input_ = reinterpret_cast(allocator_->Malloc(kNumberTypeFloat32, packed_input_size_, kWorkspace)); + MS_CHECK_PTR(packed_input_); col_major_input_size_ = uint_size * sizeof(float); col_major_input_ = reinterpret_cast(allocator_->Malloc(kNumberTypeFloat32, col_major_input_size_, kWorkspace)); + MS_CHECK_PTR(col_major_input_); return RET_OK; } @@ -68,12 +65,13 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) { conv_param_->input_channel_ = in_channel; conv_param_->output_channel_ = out_channel; int kernel_plane = kernel_h * kernel_w; - const int oc_block = C8NUM; - int oc_block_num = UP_DIV(out_channel, C8NUM); - int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane; + int oc_block = C8NUM; + if (target_ == kARM32A) { + oc_block = C4NUM; + } + int oc_block_num = UP_ROUND(out_channel, oc_block); + int pack_weight_size = oc_block_num * in_channel * kernel_plane; pack_weight_size_ = pack_weight_size * sizeof(float); - auto origin_weight = reinterpret_cast(filter_tensor_->MutableData()); - MS_CHECK_PTR(origin_weight); packed_weight_ = reinterpret_cast(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); MS_CHECK_PTR(packed_weight_); auto out_channel_size = static_cast(out_channel); @@ -88,10 +86,15 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) { } init_code.CodeMallocExpression(packed_weight_, pack_weight_size_); init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_); - init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size, - in_channel * kernel_plane); + if (target_ == kARM32A) { + init_code.CodeFunction("RowMajor2Col4Major", init_weight_str, packed_weight_, out_channel_size, + in_channel * kernel_plane); + } else { + init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size, + in_channel * kernel_plane); + } - auto bias_data_size = static_cast(oc_block_num * oc_block * sizeof(float)); + auto bias_data_size = static_cast(oc_block_num * sizeof(float)); bias_data_ = reinterpret_cast(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); MS_CHECK_PTR(bias_data_); if (input_tensors_.size() == kInputSize2) { @@ -140,78 +143,4 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) { context->AppendCode(code.str()); return RET_OK; } - -std::unique_ptr CPUConvolutionFP32CoderCreator(const std::vector &in_tensors, - const std::vector &out_tensors, - const Model::Node *node, size_t node_index, - Target target) { - std::vector inputs = in_tensors; - std::vector outputs = out_tensors; - const void *primitive = node->primitive_; - if (primitive == nullptr) { - return nullptr; - } - int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); - ParameterGen paramGen = - PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); - if (paramGen == nullptr) { - MS_LOG(ERROR) << "parameter generator is null"; - return nullptr; - } - auto conv_param = reinterpret_cast(paramGen(node->primitive_)); - int kernel_h = conv_param->kernel_h_; - int kernel_w = conv_param->kernel_w_; - conv_param->input_h_ = inputs.at(kInputIndex)->Height(); - conv_param->input_w_ = inputs.at(kInputIndex)->Width(); - conv_param->input_channel_ = inputs.at(kInputIndex)->Channel(); - conv_param->output_h_ = outputs.at(kOutputIndex)->Height(); - conv_param->output_w_ = outputs.at(kOutputIndex)->Width(); - conv_param->output_channel_ = outputs.at(kOutputIndex)->Channel(); - conv_param->op_parameter_.thread_num_ = 1; - int out_unit = 0; - bool use_winograd = CheckIfUseWinograd(&out_unit, conv_param); - free(conv_param); - // weight de quant - std::unique_ptr coder; - if (kernel_h == 1 && kernel_w == 1) { - MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; - coder = CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); - } else if (use_winograd) { - MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder"; - coder = std::make_unique(in_tensors, out_tensors, node, node_index, target, out_unit); - } else { - MS_LOG(DEBUG) << "create ConvolutionFP32Coder"; - coder = CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); - } - return coder; -} - -std::unique_ptr CPUConv2DFusionFP32CoderCreator(const std::vector &in_tensors, - const std::vector &out_tensors, - const Model::Node *node, size_t node_index, - Target target) { - const void *primitive = node->primitive_; - if (primitive == nullptr) { - return nullptr; - } - int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); - ParameterGen paramGen = - PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); - if (paramGen == nullptr) { - MS_LOG(ERROR) << "parameter generator is null"; - return nullptr; - } - auto conv_param = reinterpret_cast(paramGen(node->primitive_)); - std::unique_ptr coder; - if (conv_param->group_ == 1) { - coder = CPUConvolutionFP32CoderCreator(in_tensors, out_tensors, node, node_index, target); - } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { - coder = CPUOpCoderCreator(in_tensors, out_tensors, node, node_index, target); - } else { - // GroupConv - } - return coder; -} - -REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator) } // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/op_coder.cc b/mindspore/lite/micro/coder/opcoders/op_coder.cc index 1c956788e7b..202c8059ecf 100644 --- a/mindspore/lite/micro/coder/opcoders/op_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/op_coder.cc @@ -25,6 +25,7 @@ OperatorCoder::~OperatorCoder() { if (parameter_ != nullptr) { free(parameter_); } + parameter_ = nullptr; } const std::vector OperatorCoder::input_tensors() const { return input_tensors_; }