forked from mindspore-Ecosystem/mindspore
fix mobilenetv2 model for micro
This commit is contained in:
parent
958ec1c85b
commit
73bce8e237
|
@ -76,6 +76,7 @@ set(CODER_OPCODERS_SRC
|
|||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
|
||||
|
@ -281,8 +282,8 @@ endif()
|
|||
|
||||
#### avx
|
||||
if("${X86_64_SIMD}" STREQUAL "avx")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2")
|
||||
set(AVX_SRC
|
||||
${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c
|
||||
|
|
|
@ -20,23 +20,80 @@
|
|||
#include "nnacl/fp32/winograd_utils.h"
|
||||
#include "nnacl/int8/quantize.h"
|
||||
#include "coder/log.h"
|
||||
namespace mindspore::lite::micro {
|
||||
|
||||
namespace {
|
||||
int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) {
|
||||
Conv2DBaseCoder::~Conv2DBaseCoder() {
|
||||
FreeConvQuantParams();
|
||||
conv_param_ = nullptr;
|
||||
conv_quant_arg_ = nullptr;
|
||||
filter_tensor_ = nullptr;
|
||||
bias_tensor_ = nullptr;
|
||||
}
|
||||
|
||||
void Conv2DBaseCoder::FreeConvQuantParams() {
|
||||
if (conv_quant_arg_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (conv_quant_arg_->real_multiplier_ != nullptr) {
|
||||
free(conv_quant_arg_->real_multiplier_);
|
||||
conv_quant_arg_->real_multiplier_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->left_shift_ != nullptr) {
|
||||
free(conv_quant_arg_->left_shift_);
|
||||
conv_quant_arg_->left_shift_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->right_shift_ != nullptr) {
|
||||
free(conv_quant_arg_->right_shift_);
|
||||
conv_quant_arg_->right_shift_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->quant_multiplier_ != nullptr) {
|
||||
free(conv_quant_arg_->quant_multiplier_);
|
||||
conv_quant_arg_->quant_multiplier_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->out_act_min_ != nullptr) {
|
||||
free(conv_quant_arg_->out_act_min_);
|
||||
conv_quant_arg_->out_act_min_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->out_act_max_ != nullptr) {
|
||||
free(conv_quant_arg_->out_act_max_);
|
||||
conv_quant_arg_->out_act_max_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->input_quant_args_ != nullptr) {
|
||||
free(conv_quant_arg_->input_quant_args_);
|
||||
conv_quant_arg_->input_quant_args_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->filter_quant_args_ != nullptr) {
|
||||
free(conv_quant_arg_->filter_quant_args_);
|
||||
conv_quant_arg_->filter_quant_args_ = nullptr;
|
||||
}
|
||||
if (conv_quant_arg_->output_quant_args_ != nullptr) {
|
||||
free(conv_quant_arg_->output_quant_args_);
|
||||
conv_quant_arg_->output_quant_args_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int Conv2DBaseCoder::MallocConvQuantParams(size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) {
|
||||
MS_CHECK_TRUE(input_arg_num > 0, "invalid value of input_arg_num");
|
||||
MS_CHECK_TRUE(filter_arg_num > 0, "invalid value of filter_arg_num");
|
||||
MS_CHECK_TRUE(output_arg_num > 0, "invalid value of output_arg_num");
|
||||
quant_arg->input_quant_args_ = static_cast<QuantArg *>(malloc(input_arg_num * sizeof(struct QuantArg)));
|
||||
MS_CHECK_PTR(quant_arg->input_quant_args_);
|
||||
quant_arg->filter_quant_args_ = static_cast<QuantArg *>(malloc(filter_arg_num * sizeof(QuantArg)));
|
||||
MS_CHECK_PTR(quant_arg->filter_quant_args_);
|
||||
quant_arg->output_quant_args_ = static_cast<QuantArg *>(malloc(output_arg_num * sizeof(QuantArg)));
|
||||
MS_CHECK_PTR(quant_arg->output_quant_args_);
|
||||
return mindspore::lite::RET_OK;
|
||||
conv_quant_arg_->input_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(input_arg_num * sizeof(::QuantArg)));
|
||||
if (conv_quant_arg_->input_quant_args_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->filter_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(filter_arg_num * sizeof(::QuantArg)));
|
||||
if (conv_quant_arg_->filter_quant_args_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->output_quant_args_ = reinterpret_cast<::QuantArg *>(malloc(output_arg_num * sizeof(::QuantArg)));
|
||||
if (conv_quant_arg_->output_quant_args_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace mindspore::lite::micro {
|
||||
std::string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) {
|
||||
std::string ret;
|
||||
if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) {
|
||||
|
@ -116,7 +173,7 @@ int Conv2DBaseCoder::MallocQuantParam() {
|
|||
conv_quant_arg_->input_arg_num_ = input_arg_num;
|
||||
conv_quant_arg_->filter_arg_num_ = filter_arg_num;
|
||||
conv_quant_arg_->output_arg_num_ = output_arg_num;
|
||||
MallocConvQuantParams(conv_quant_arg_, input_arg_num, filter_arg_num, output_arg_num);
|
||||
MallocConvQuantParams(input_arg_num, filter_arg_num, output_arg_num);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -125,7 +182,7 @@ int Conv2DBaseCoder::SetInputTensorQuantParam() {
|
|||
if (in_arg_num == kPerTensor) {
|
||||
QuantArg input_quant_arg = input_tensor_->quant_params().at(0);
|
||||
conv_quant_arg_->input_quant_args_[0].zp_ = input_quant_arg.zeroPoint;
|
||||
conv_quant_arg_->input_quant_args_[0].scale_ = input_quant_arg.scale;
|
||||
conv_quant_arg_->input_quant_args_[0].scale_ = static_cast<float>(input_quant_arg.scale);
|
||||
return RET_OK;
|
||||
} else {
|
||||
// per channel
|
||||
|
@ -139,12 +196,12 @@ int Conv2DBaseCoder::SetFilterTensorQuantParam() {
|
|||
if (weight_arg_num == kPerTensor) {
|
||||
QuantArg weight_quant_arg = filter_tensor_->quant_params().at(0);
|
||||
conv_quant_arg_->filter_quant_args_[0].zp_ = weight_quant_arg.zeroPoint;
|
||||
conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale;
|
||||
conv_quant_arg_->filter_quant_args_[0].scale_ = static_cast<float>(weight_quant_arg.scale);
|
||||
} else {
|
||||
std::vector<QuantArg> weight_quant_arg = filter_tensor_->quant_params();
|
||||
for (int i = 0; i < static_cast<int>(weight_arg_num); ++i) {
|
||||
conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint;
|
||||
conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale;
|
||||
conv_quant_arg_->filter_quant_args_[i].scale_ = static_cast<float>(weight_quant_arg[i].scale);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
|
@ -155,7 +212,7 @@ int Conv2DBaseCoder::SetOutputTensorQuantParam() {
|
|||
if (out_arg_num == kPerTensor) {
|
||||
QuantArg output_quant_arg = output_tensor_->quant_params().at(0);
|
||||
conv_quant_arg_->output_quant_args_[0].zp_ = output_quant_arg.zeroPoint;
|
||||
conv_quant_arg_->output_quant_args_[0].scale_ = output_quant_arg.scale;
|
||||
conv_quant_arg_->output_quant_args_[0].scale_ = static_cast<float>(output_quant_arg.scale);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Not Support Per Channel for input now.";
|
||||
return RET_ERROR;
|
||||
|
@ -170,17 +227,35 @@ int Conv2DBaseCoder::SetQuantMultiplier() {
|
|||
weight_arg_num = conv_quant_arg_->filter_arg_num_;
|
||||
}
|
||||
conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(weight_arg_num * sizeof(double)));
|
||||
MS_CHECK_PTR(conv_quant_arg_->real_multiplier_);
|
||||
if (conv_quant_arg_->real_multiplier_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(conv_quant_arg_->left_shift_);
|
||||
if (conv_quant_arg_->left_shift_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(conv_quant_arg_->right_shift_);
|
||||
if (conv_quant_arg_->right_shift_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(weight_arg_num * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(conv_quant_arg_->quant_multiplier_);
|
||||
if (conv_quant_arg_->quant_multiplier_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
|
||||
MS_CHECK_PTR(conv_quant_arg_->out_act_min_);
|
||||
if (conv_quant_arg_->out_act_min_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
|
||||
MS_CHECK_PTR(conv_quant_arg_->out_act_max_);
|
||||
if (conv_quant_arg_->out_act_max_ == nullptr) {
|
||||
FreeConvQuantParams();
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (int i = 0; i < weight_arg_num; ++i) {
|
||||
const auto in_scale =
|
||||
static_cast<double>(conv_quant_arg_->input_quant_args_[0].scale_ * conv_quant_arg_->filter_quant_args_[i].scale_);
|
||||
|
@ -197,7 +272,7 @@ int Conv2DBaseCoder::SetQuantMultiplier() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Conv2DBaseCoder::CheckResizeValid() const {
|
||||
int Conv2DBaseCoder::CheckResizeValid() {
|
||||
// ===============check in channel================= //
|
||||
int32_t filter_in_channel = filter_tensor_->Channel();
|
||||
int32_t resize_in_channel = input_tensor_->Channel();
|
||||
|
@ -240,10 +315,6 @@ int Conv2DBaseCoder::SetQuantParam() {
|
|||
MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed.");
|
||||
SetRoundingAndMultipilerMode();
|
||||
MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed.");
|
||||
// now only consider per tensor for output
|
||||
MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_min_);
|
||||
MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_max_);
|
||||
MS_CHECK_PTR(conv_param_->conv_quant_arg_.output_quant_args_);
|
||||
bool relu = conv_param_->act_type_ == ActType_Relu;
|
||||
bool relu6 = conv_param_->act_type_ == ActType_Relu6;
|
||||
CalculateActivationRangeQuantized(relu, relu6, conv_param_->conv_quant_arg_.output_quant_args_[0].zp_,
|
||||
|
|
|
@ -32,24 +32,7 @@ class Conv2DBaseCoder : public OperatorCoder {
|
|||
const Model::Node *node, size_t node_index, Target target)
|
||||
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
~Conv2DBaseCoder() override {
|
||||
if (conv_quant_arg_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
free(conv_quant_arg_->real_multiplier_);
|
||||
free(conv_quant_arg_->left_shift_);
|
||||
free(conv_quant_arg_->right_shift_);
|
||||
free(conv_quant_arg_->quant_multiplier_);
|
||||
free(conv_quant_arg_->out_act_min_);
|
||||
free(conv_quant_arg_->out_act_max_);
|
||||
free(conv_quant_arg_->input_quant_args_);
|
||||
free(conv_quant_arg_->filter_quant_args_);
|
||||
free(conv_quant_arg_->output_quant_args_);
|
||||
conv_param_ = nullptr;
|
||||
conv_quant_arg_ = nullptr;
|
||||
filter_tensor_ = nullptr;
|
||||
bias_tensor_ = nullptr;
|
||||
}
|
||||
~Conv2DBaseCoder() override;
|
||||
|
||||
protected:
|
||||
virtual int Init();
|
||||
|
@ -68,7 +51,7 @@ class Conv2DBaseCoder : public OperatorCoder {
|
|||
|
||||
int SetQuantMultiplier();
|
||||
|
||||
int CheckResizeValid() const;
|
||||
int CheckResizeValid();
|
||||
|
||||
int SetIfPerChannel();
|
||||
|
||||
|
@ -80,6 +63,11 @@ class Conv2DBaseCoder : public OperatorCoder {
|
|||
|
||||
std::string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format);
|
||||
|
||||
private:
|
||||
int MallocConvQuantParams(size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num);
|
||||
void FreeConvQuantParams();
|
||||
|
||||
protected:
|
||||
ConvParameter *conv_param_{nullptr};
|
||||
|
||||
ConvQuantArg *conv_quant_arg_{nullptr};
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.h"
|
||||
#include "src/common/version_manager.h"
|
||||
#include "src/ops/populate/populate_register.h"
|
||||
#include "nnacl/fp32/winograd_utils.h"
|
||||
#include "coder/opcoders/nnacl/fp32/convolution_fp32_coder.h"
|
||||
#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
|
||||
#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
|
||||
using mindspore::schema::PrimitiveType_Conv2DFusion;
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
int ConvDelegateCoder::Prepare(CoderContext *const context) {
|
||||
// Update shape info of input and output
|
||||
SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(parameter_), input_tensor_, output_tensor_);
|
||||
if (conv_coder_ == nullptr) {
|
||||
// need to select actual execute coder here
|
||||
conv_coder_ = CPUConvolutionFP32CoderSelect(input_tensors_, output_tensors_, node_, node_index(), target_);
|
||||
MS_CHECK_PTR(conv_coder_);
|
||||
const void *primitive = node_->primitive_;
|
||||
MS_CHECK_PTR(primitive);
|
||||
int primitive_type = GetPrimitiveType(node_->primitive_);
|
||||
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
||||
ParameterGen parameter_gen =
|
||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node_->primitive_), schema_version);
|
||||
MS_CHECK_PTR(parameter_gen);
|
||||
OpParameter *op_parameter = parameter_gen(node_->primitive_);
|
||||
op_parameter->thread_num_ = thread_num_;
|
||||
conv_coder_->set_type(primitive_type);
|
||||
conv_coder_->set_thread_num(thread_num_);
|
||||
conv_coder_->set_parameter(op_parameter);
|
||||
}
|
||||
return conv_coder_->Prepare(context);
|
||||
}
|
||||
|
||||
int ConvDelegateCoder::DoCode(CoderContext *const context) { return conv_coder_->DoCode(context); }
|
||||
|
||||
void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output) {
|
||||
conv_param->input_batch_ = input->Batch();
|
||||
conv_param->input_h_ = input->Height();
|
||||
conv_param->input_w_ = input->Width();
|
||||
conv_param->input_channel_ = input->Channel();
|
||||
conv_param->output_batch_ = output->Batch();
|
||||
conv_param->output_h_ = output->Height();
|
||||
conv_param->output_w_ = output->Width();
|
||||
conv_param->output_channel_ = output->Channel();
|
||||
}
|
||||
|
||||
std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index,
|
||||
Target target) {
|
||||
const void *primitive = node->primitive_;
|
||||
if (primitive == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
||||
ParameterGen paramGen =
|
||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
|
||||
if (paramGen == nullptr) {
|
||||
MS_LOG(ERROR) << "parameter generator is null";
|
||||
return nullptr;
|
||||
}
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
|
||||
bool use_winograd = false;
|
||||
int out_unit = 0;
|
||||
int kernel_h = conv_param->kernel_h_;
|
||||
int kernel_w = conv_param->kernel_w_;
|
||||
conv_param->input_h_ = in_tensors.at(kInputIndex)->Height();
|
||||
conv_param->input_w_ = in_tensors.at(kInputIndex)->Width();
|
||||
conv_param->input_channel_ = in_tensors.at(kInputIndex)->Channel();
|
||||
conv_param->output_h_ = out_tensors.at(kOutputIndex)->Height();
|
||||
conv_param->output_w_ = out_tensors.at(kOutputIndex)->Width();
|
||||
conv_param->output_channel_ = out_tensors.at(kOutputIndex)->Channel();
|
||||
conv_param->op_parameter_.thread_num_ = 1;
|
||||
use_winograd = CheckIfUseWinograd(&out_unit, conv_param);
|
||||
free(conv_param);
|
||||
std::unique_ptr<OperatorCoder> coder;
|
||||
if (kernel_h == 1 && kernel_w == 1) {
|
||||
MS_LOG(DEBUG) << "create ConvolutionFP32Coder";
|
||||
coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target);
|
||||
} else if (use_winograd) {
|
||||
MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder";
|
||||
coder = std::make_unique<ConvolutionWinogradFP32Coder>(in_tensors, out_tensors, node, node_index, target, out_unit);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "create ConvolutionFP32Coder";
|
||||
coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target);
|
||||
}
|
||||
return coder;
|
||||
}
|
||||
|
||||
std::unique_ptr<OperatorCoder> CreateDelegateConv(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors, const Model::Node *node,
|
||||
size_t node_index, Target target) {
|
||||
return CPUOpCoderCreator<ConvDelegateCoder>(in_tensors, out_tensors, node, node_index, target);
|
||||
}
|
||||
|
||||
std::unique_ptr<OperatorCoder> CPUConvDwFp32CoderCreator(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target) {
|
||||
return CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target);
|
||||
}
|
||||
|
||||
std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index,
|
||||
Target target) {
|
||||
const void *primitive = node->primitive_;
|
||||
if (primitive == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
||||
ParameterGen paramGen =
|
||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
|
||||
if (paramGen == nullptr) {
|
||||
MS_LOG(ERROR) << "parameter generator is null";
|
||||
return nullptr;
|
||||
}
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
|
||||
std::unique_ptr<OperatorCoder> coder;
|
||||
if (conv_param->group_ == 1) {
|
||||
coder = CreateDelegateConv(in_tensors, out_tensors, node, node_index, target);
|
||||
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
|
||||
coder = CPUConvDwFp32CoderCreator(in_tensors, out_tensors, node, node_index, target);
|
||||
} else {
|
||||
// GroupConv
|
||||
return nullptr;
|
||||
}
|
||||
return coder;
|
||||
}
|
||||
|
||||
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator)
|
||||
} // namespace mindspore::lite::micro::nnacl
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H
|
||||
#define MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "coder/opcoders/op_coder.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class ConvDelegateCoder : public OperatorCoder {
|
||||
public:
|
||||
ConvDelegateCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
~ConvDelegateCoder() override = default;
|
||||
int Prepare(CoderContext *const context) override;
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<OperatorCoder> conv_coder_ = nullptr;
|
||||
};
|
||||
|
||||
void SetInputOutputShapeInfo(ConvParameter *conv_param, const lite::Tensor *input, const lite::Tensor *output);
|
||||
std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target);
|
||||
std::unique_ptr<OperatorCoder> CreateDelegateConv(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors, const Model::Node *node,
|
||||
size_t node_index, Target target);
|
||||
std::unique_ptr<OperatorCoder> CPUConvDwFp32CoderCreator(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target);
|
||||
|
||||
std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index,
|
||||
Target target);
|
||||
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_OPCODERS_NNACL_FP32_CONV2D_DELEGATE_FP32_CODER_H
|
|
@ -32,17 +32,14 @@ using mindspore::schema::PrimitiveType_Conv2DFusion;
|
|||
namespace mindspore::lite::micro::nnacl {
|
||||
int ConvolutionFP32Coder::InitTmpBuffer() {
|
||||
int in_channel = conv_param_->input_channel_;
|
||||
int uint_size;
|
||||
if (target_ == kARM32A) {
|
||||
uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C4NUM * thread_num_;
|
||||
} else {
|
||||
uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_num_;
|
||||
}
|
||||
int uint_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * in_channel * C12NUM * thread_num_;
|
||||
packed_input_size_ = uint_size * sizeof(float);
|
||||
packed_input_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, packed_input_size_, kWorkspace));
|
||||
MS_CHECK_PTR(packed_input_);
|
||||
col_major_input_size_ = uint_size * sizeof(float);
|
||||
col_major_input_ =
|
||||
reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, col_major_input_size_, kWorkspace));
|
||||
MS_CHECK_PTR(col_major_input_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -68,12 +65,13 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
|
|||
conv_param_->input_channel_ = in_channel;
|
||||
conv_param_->output_channel_ = out_channel;
|
||||
int kernel_plane = kernel_h * kernel_w;
|
||||
const int oc_block = C8NUM;
|
||||
int oc_block_num = UP_DIV(out_channel, C8NUM);
|
||||
int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane;
|
||||
int oc_block = C8NUM;
|
||||
if (target_ == kARM32A) {
|
||||
oc_block = C4NUM;
|
||||
}
|
||||
int oc_block_num = UP_ROUND(out_channel, oc_block);
|
||||
int pack_weight_size = oc_block_num * in_channel * kernel_plane;
|
||||
pack_weight_size_ = pack_weight_size * sizeof(float);
|
||||
auto origin_weight = reinterpret_cast<float *>(filter_tensor_->MutableData());
|
||||
MS_CHECK_PTR(origin_weight);
|
||||
packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(packed_weight_);
|
||||
auto out_channel_size = static_cast<size_t>(out_channel);
|
||||
|
@ -88,10 +86,15 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
|
|||
}
|
||||
init_code.CodeMallocExpression(packed_weight_, pack_weight_size_);
|
||||
init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_);
|
||||
init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size,
|
||||
in_channel * kernel_plane);
|
||||
if (target_ == kARM32A) {
|
||||
init_code.CodeFunction("RowMajor2Col4Major", init_weight_str, packed_weight_, out_channel_size,
|
||||
in_channel * kernel_plane);
|
||||
} else {
|
||||
init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size,
|
||||
in_channel * kernel_plane);
|
||||
}
|
||||
|
||||
auto bias_data_size = static_cast<size_t>(oc_block_num * oc_block * sizeof(float));
|
||||
auto bias_data_size = static_cast<size_t>(oc_block_num * sizeof(float));
|
||||
bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(bias_data_);
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
|
@ -140,78 +143,4 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) {
|
|||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index,
|
||||
Target target) {
|
||||
std::vector<Tensor *> inputs = in_tensors;
|
||||
std::vector<Tensor *> outputs = out_tensors;
|
||||
const void *primitive = node->primitive_;
|
||||
if (primitive == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
||||
ParameterGen paramGen =
|
||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
|
||||
if (paramGen == nullptr) {
|
||||
MS_LOG(ERROR) << "parameter generator is null";
|
||||
return nullptr;
|
||||
}
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
|
||||
int kernel_h = conv_param->kernel_h_;
|
||||
int kernel_w = conv_param->kernel_w_;
|
||||
conv_param->input_h_ = inputs.at(kInputIndex)->Height();
|
||||
conv_param->input_w_ = inputs.at(kInputIndex)->Width();
|
||||
conv_param->input_channel_ = inputs.at(kInputIndex)->Channel();
|
||||
conv_param->output_h_ = outputs.at(kOutputIndex)->Height();
|
||||
conv_param->output_w_ = outputs.at(kOutputIndex)->Width();
|
||||
conv_param->output_channel_ = outputs.at(kOutputIndex)->Channel();
|
||||
conv_param->op_parameter_.thread_num_ = 1;
|
||||
int out_unit = 0;
|
||||
bool use_winograd = CheckIfUseWinograd(&out_unit, conv_param);
|
||||
free(conv_param);
|
||||
// weight de quant
|
||||
std::unique_ptr<OperatorCoder> coder;
|
||||
if (kernel_h == 1 && kernel_w == 1) {
|
||||
MS_LOG(DEBUG) << "create ConvolutionFP32Coder";
|
||||
coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target);
|
||||
} else if (use_winograd) {
|
||||
MS_LOG(DEBUG) << "create Conv2DWinogradFP32Coder";
|
||||
coder = std::make_unique<ConvolutionWinogradFP32Coder>(in_tensors, out_tensors, node, node_index, target, out_unit);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "create ConvolutionFP32Coder";
|
||||
coder = CPUOpCoderCreator<ConvolutionFP32Coder>(in_tensors, out_tensors, node, node_index, target);
|
||||
}
|
||||
return coder;
|
||||
}
|
||||
|
||||
std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index,
|
||||
Target target) {
|
||||
const void *primitive = node->primitive_;
|
||||
if (primitive == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
||||
ParameterGen paramGen =
|
||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
|
||||
if (paramGen == nullptr) {
|
||||
MS_LOG(ERROR) << "parameter generator is null";
|
||||
return nullptr;
|
||||
}
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
|
||||
std::unique_ptr<OperatorCoder> coder;
|
||||
if (conv_param->group_ == 1) {
|
||||
coder = CPUConvolutionFP32CoderCreator(in_tensors, out_tensors, node, node_index, target);
|
||||
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
|
||||
coder = CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target);
|
||||
} else {
|
||||
// GroupConv
|
||||
}
|
||||
return coder;
|
||||
}
|
||||
|
||||
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator)
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
|
|
|
@ -25,6 +25,7 @@ OperatorCoder::~OperatorCoder() {
|
|||
if (parameter_ != nullptr) {
|
||||
free(parameter_);
|
||||
}
|
||||
parameter_ = nullptr;
|
||||
}
|
||||
|
||||
const std::vector<Tensor *> OperatorCoder::input_tensors() const { return input_tensors_; }
|
||||
|
|
Loading…
Reference in New Issue