!24466 【MS】【LITE】optimize code

Merge pull request !24466 from chenjianping/code_check
This commit is contained in:
i-robot 2021-09-30 06:17:40 +00:00 committed by Gitee
commit 1fd8f840fa
10 changed files with 58 additions and 21 deletions

View File

@ -20,26 +20,27 @@
#include "nnacl/op_base.h"
#include "nnacl/common_func.h"
#include "nnacl/nnacl_utils.h"
#define ARITHMETIC_SUPPORT_DIMS_NUM 10
typedef struct ArithmeticParameter {
OpParameter op_parameter_;
bool broadcasting_;
size_t ndim_;
int activation_type_;
int in_shape0_[10];
int in_shape0_[ARITHMETIC_SUPPORT_DIMS_NUM];
int in_elements_num0_;
int in_shape1_[10];
int in_shape1_[ARITHMETIC_SUPPORT_DIMS_NUM];
int in_elements_num1_;
int out_shape_[10];
int out_shape_[ARITHMETIC_SUPPORT_DIMS_NUM];
int out_elements_num_;
int in_strides0_[10];
int in_strides1_[10];
int out_strides_[10];
int in_strides0_[ARITHMETIC_SUPPORT_DIMS_NUM];
int in_strides1_[ARITHMETIC_SUPPORT_DIMS_NUM];
int out_strides_[ARITHMETIC_SUPPORT_DIMS_NUM];
int multiples0_[10];
int multiples1_[10];
int multiples0_[ARITHMETIC_SUPPORT_DIMS_NUM];
int multiples1_[ARITHMETIC_SUPPORT_DIMS_NUM];
int eltwise_mode_; // eltwise need
} ArithmeticParameter;

View File

@ -56,8 +56,11 @@ int AddNFp16CPUKernel::AddNParallelRun(int thread_id, float lhs_scale, float rhs
int AddNFp16CPUKernel::Run() {
elements_num_ = out_tensors_[0]->ElementsNum();
auto input0_data = reinterpret_cast<float16_t *>(in_tensors_[0]->MutableData());
CHECK_NULL_RETURN(input0_data);
auto input1_data = reinterpret_cast<float16_t *>(in_tensors_[1]->MutableData());
CHECK_NULL_RETURN(input1_data);
auto out_data = reinterpret_cast<float16_t *>(out_tensors_[0]->MutableData());
CHECK_NULL_RETURN(out_data);
if (static_cast<int>(elements_num_) < op_parameter_->thread_num_) {
if (in_tensors_[0]->shape() == in_tensors_[1]->shape()) {
ElementAddFp16(input0_data, input1_data, out_data, elements_num_);
@ -71,6 +74,7 @@ int AddNFp16CPUKernel::Run() {
}
for (size_t i = 2; i < in_tensors_.size(); ++i) {
CHECK_NULL_RETURN(in_tensors_[i]->data());
if (in_tensors_[i]->shape() == out_tensors_[0]->shape()) {
ElementAddFp16(reinterpret_cast<float16_t *>(in_tensors_[i]->data()), out_data, out_data, elements_num_);
} else {
@ -95,6 +99,7 @@ int AddNFp16CPUKernel::Run() {
}
for (size_t i = 2; i < in_tensors_.size(); ++i) {
in1_addr_ = reinterpret_cast<float16_t *>(in_tensors_[i]->MutableData());
CHECK_NULL_RETURN(in1_addr_);
in2_addr_ = out_data;
ret = ParallelLaunch(this->ms_context_, AddNLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {

View File

@ -55,7 +55,7 @@ int ArithmeticCPUKernel::ReSize() {
CalcMultiplesAndStrides(param_);
if (param_->broadcasting_) {
outside_ = 1;
for (int i = static_cast<int>(param_->ndim_) - 1; i >= 0; --i) {
for (int i = static_cast<int>(param_->ndim_) - 1; i >= 0 && i < ARITHMETIC_SUPPORT_DIMS_NUM; --i) {
if (param_->in_shape0_[i] != param_->in_shape1_[i]) {
break_pos_ = i;
break;
@ -139,6 +139,7 @@ int ArithmeticCPUKernel::ConstTensorBroadCast() {
if (input0_ptr_ == nullptr) {
return RET_ERROR;
}
CHECK_NULL_RETURN(in_tensors_[0]->data());
TileConstTensor(in_tensors_[0]->data(), input0_ptr_, param_->ndim_, param_->in_shape0_, param_->in_strides0_,
param_->out_strides_, param_->multiples0_);
input0_broadcast_ = true;
@ -155,6 +156,7 @@ int ArithmeticCPUKernel::ConstTensorBroadCast() {
FreeConstTileBuff();
return RET_ERROR;
}
CHECK_NULL_RETURN(in_tensors_[1]->data());
TileConstTensor(in_tensors_[1]->data(), input1_ptr_, param_->ndim_, param_->in_shape1_, param_->in_strides1_,
param_->out_strides_, param_->multiples1_);
input1_broadcast_ = true;
@ -393,6 +395,7 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
if (count <= 0) {
return RET_OK;
}
CHECK_LESS_RETURN(ARITHMETIC_SUPPORT_DIMS_NUM, param_->ndim_);
int offset = stride * task_id * data_type_len_;
/* run opt function, one of input is scalar */
if (IsScalarClac()) { // 2 32 240 240, 1 1 1 1
@ -442,11 +445,14 @@ int ArithmeticCPUKernel::Run() {
}
if (!input0_broadcast_) {
input0_ptr_ = in_tensors_[0]->data();
CHECK_NULL_RETURN(input0_ptr_);
}
if (!input1_broadcast_) {
input1_ptr_ = in_tensors_[1]->data();
CHECK_NULL_RETURN(input1_ptr_);
}
output_ptr_ = out_tensors_[0]->data();
CHECK_NULL_RETURN(output_ptr_);
return ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_);
}

View File

@ -26,7 +26,7 @@ using mindspore::schema::PrimitiveType_Unique;
namespace mindspore::kernel {
int UniqueCPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), 1);
CHECK_LESS_RETURN(out_tensors_.size(), 2);
CHECK_LESS_RETURN(out_tensors_.size(), C2NUM);
return RET_OK;
}
@ -34,11 +34,11 @@ int UniqueCPUKernel::ReSize() { return RET_OK; }
int UniqueCPUKernel::Run() {
auto input = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
MS_ASSERT(input);
CHECK_NULL_RETURN(input);
auto output0 = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
MS_ASSERT(output0);
CHECK_NULL_RETURN(output0);
auto output1 = reinterpret_cast<int *>(out_tensors_.at(1)->MutableData());
MS_ASSERT(output1);
CHECK_NULL_RETURN(output1);
int output0_len = 0;
Unique(input, in_tensors_.at(0)->ElementsNum(), output0, &output0_len, output1);

View File

@ -52,6 +52,8 @@ int ArithmeticsInt8Launch(void *cdata, int task_id, float lhs_scale, float rhs_s
} // namespace
int ArithmeticInt8CPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
switch (op_parameter_->type_) {
case PrimitiveType_Equal:
arithmetic_run_ = ElementEqualInt8;
@ -105,8 +107,11 @@ int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; }
int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
auto input0_data = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData());
auto input1_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData());
CHECK_NULL_RETURN(input0_data);
auto input1_data = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData());
CHECK_NULL_RETURN(input1_data);
auto output_data = reinterpret_cast<uint8_t *>(out_tensors_[0]->MutableData());
CHECK_NULL_RETURN(output_data);
auto element_num = out_tensors_[0]->ElementsNum();
auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
int error_code;
@ -125,7 +130,7 @@ int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
return error_code;
}
} else if (arithmetic_run_ != nullptr) {
error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num, &quant_args_);
error_code = arithmetic_run_(input0_data, input1_data, output_data, element_num, &quant_args_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code;
return error_code;
@ -141,7 +146,9 @@ int ArithmeticInt8CPUKernel::Run() {
auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
if (param->broadcasting_) {
auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData());
CHECK_NULL_RETURN(input_data0);
auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData());
CHECK_NULL_RETURN(input_data1);
tile_data0_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_[0]->Size()));
tile_data1_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_[0]->Size()));
if (tile_data0_ == nullptr || tile_data1_ == nullptr) {

View File

@ -39,6 +39,8 @@ DepthToSpaceInt8CPUKernel::~DepthToSpaceInt8CPUKernel() {
}
int DepthToSpaceInt8CPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), 1);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
param_->data_type_size_ = sizeof(int8_t);
in_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
@ -48,6 +50,7 @@ int DepthToSpaceInt8CPUKernel::Prepare() {
}
auto *input_tensor = in_tensors_.at(kInputIndex);
auto in_quant_args = input_tensor->quant_params();
CHECK_LESS_RETURN(in_quant_args.size(), 1);
in_quant_arg_->scale_ = in_quant_args.front().scale;
in_quant_arg_->zp_ = in_quant_args.front().zeroPoint;
@ -58,6 +61,7 @@ int DepthToSpaceInt8CPUKernel::Prepare() {
}
auto *out_tensor = out_tensors_.at(kOutputIndex);
auto out_quant_args = out_tensor->quant_params();
CHECK_LESS_RETURN(out_quant_args.size(), 1);
out_quant_arg_->scale_ = out_quant_args.front().scale;
out_quant_arg_->zp_ = out_quant_args.front().zeroPoint;
if (!InferShapeDone()) {
@ -70,7 +74,9 @@ int DepthToSpaceInt8CPUKernel::Run() {
auto input = in_tensors_[0];
auto output = out_tensors_[0];
const int8_t *input_data = reinterpret_cast<const int8_t *>(input->data());
CHECK_NULL_RETURN(input_data);
int8_t *output_data = reinterpret_cast<int8_t *>(output->data());
CHECK_NULL_RETURN(output_data);
auto in_shape = input->shape();
if (std::abs(in_quant_arg_->scale_ - out_quant_arg_->scale_) < FLT_EPSILON &&
in_quant_arg_->zp_ == out_quant_arg_->zp_) {

View File

@ -26,6 +26,7 @@ namespace mindspore::kernel {
int FullconnectionInt8CPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
CHECK_NULL_RETURN(param_);
param_->batch = 1;
param_->a_transpose_ = false;
param_->b_transpose_ = true;
@ -45,6 +46,7 @@ int FullconnectionInt8CPUKernel::Prepare() {
}
int FullconnectionInt8CPUKernel::ReSize() {
CHECK_NULL_RETURN(param_);
int row = 1;
for (size_t i = 0; i < out_tensors_.at(0)->shape().size() - 1; ++i) {
row *= (out_tensors_.at(0)->shape()).at(i);

View File

@ -31,9 +31,12 @@ using mindspore::schema::PrimitiveType_Gather;
namespace mindspore::kernel {
int GatherInt8CPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_;
auto in_quant_args = in_tensors_.at(0)->quant_params();
CHECK_LESS_RETURN(in_quant_args.size(), 1);
auto out_quant_args = out_tensors_.at(0)->quant_params();
CHECK_LESS_RETURN(out_quant_args.size(), 1);
param_.alpha_ = in_quant_args.front().scale / out_quant_args.front().scale;
param_.zp_in_ = in_quant_args.front().zeroPoint;
param_.zp_out_ = out_quant_args.front().zeroPoint;

View File

@ -36,10 +36,12 @@ int PowerInt8CPUKernel::Prepare() {
MSLITE_CHECK_PTR(output);
auto in_quant_args = input->quant_params();
CHECK_LESS_RETURN(in_quant_args.size(), 1);
param_->quant_arg_.in_args_.scale_ = in_quant_args.front().scale;
param_->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint;
auto out_quant_args = output->quant_params();
CHECK_LESS_RETURN(out_quant_args.size(), 1);
param_->quant_arg_.out_args_.scale_ = out_quant_args.front().scale;
param_->quant_arg_.out_args_.zp_ = out_quant_args.front().zeroPoint;
@ -69,6 +71,7 @@ int PowerInt8CPUKernel::DoPower(int task_id) {
if (in_tensors_.size() == 2) {
auto exp_tensor = in_tensors_.at(1);
auto exp_quant_args = exp_tensor->quant_params();
CHECK_LESS_RETURN(exp_quant_args.size(), 1);
param_->quant_arg_.exp_args_.scale_ = exp_quant_args.front().scale;
param_->quant_arg_.exp_args_.zp_ = exp_quant_args.front().zeroPoint;
exp_ptr = reinterpret_cast<int8_t *>(exp_tensor->MutableData());

View File

@ -27,6 +27,8 @@ namespace {
constexpr size_t kMaxShapeSize = 20;
} // namespace
int TransposeInt8CPUKernel::Prepare() {
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
CHECK_LESS_RETURN(out_tensors_.size(), 1);
if (!InferShapeDone()) {
return RET_OK;
}
@ -54,7 +56,7 @@ int TransposeInt8CPUKernel::ReSize() {
// get perm data
auto perm_tensor = in_tensors_.at(1);
int *perm_data = reinterpret_cast<int *>(perm_tensor->data());
MS_ASSERT(perm_data != nullptr);
CHECK_NULL_RETURN(perm_data);
transpose_param_->num_axes_ = perm_tensor->ElementsNum();
for (int i = 0; i < transpose_param_->num_axes_; ++i) {
transpose_param_->perm_[i] = perm_data[i];
@ -70,11 +72,11 @@ int TransposeInt8CPUKernel::ReSize() {
}
int TransposeInt8CPUKernel::DoTranspose(int task_id) {
MS_ASSERT(in_ptr_);
MS_ASSERT(out_ptr_);
MS_ASSERT(in_shape_);
MS_ASSERT(out_shape_);
MS_ASSERT(transpose_param_);
CHECK_NULL_RETURN(in_ptr_);
CHECK_NULL_RETURN(out_ptr_);
CHECK_NULL_RETURN(in_shape_);
CHECK_NULL_RETURN(out_shape_);
CHECK_NULL_RETURN(transpose_param_);
TransposeDimsInt8(in_ptr_, out_ptr_, out_shape_, transpose_param_, task_id, op_parameter_->thread_num_);
return RET_OK;
}
@ -106,7 +108,9 @@ int TransposeInt8CPUKernel::Run() {
auto out_dims = out_tensor->shape();
in_ptr_ = reinterpret_cast<int8_t *>(in_tensor->data());
CHECK_NULL_RETURN(in_ptr_);
out_ptr_ = reinterpret_cast<int8_t *>(out_tensor->data());
CHECK_NULL_RETURN(out_ptr_);
GetNHNCTransposeFunc(in_tensor, out_tensor, transpose_param_);
if (NHNCTransposeFunc_ != nullptr) {
NHNCTransposeFunc_(in_ptr_, out_ptr_, nhnc_param_[0], nhnc_param_[1], nhnc_param_[2]);