forked from mindspore-Ecosystem/mindspore
!24466 【MS】【LITE】optimize code
Merge pull request !24466 from chenjianping/code_check
This commit is contained in:
commit
1fd8f840fa
|
@ -20,26 +20,27 @@
|
|||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "nnacl/nnacl_utils.h"
|
||||
#define ARITHMETIC_SUPPORT_DIMS_NUM 10
|
||||
|
||||
typedef struct ArithmeticParameter {
|
||||
OpParameter op_parameter_;
|
||||
bool broadcasting_;
|
||||
size_t ndim_;
|
||||
int activation_type_;
|
||||
int in_shape0_[10];
|
||||
int in_shape0_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int in_elements_num0_;
|
||||
int in_shape1_[10];
|
||||
int in_shape1_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int in_elements_num1_;
|
||||
|
||||
int out_shape_[10];
|
||||
int out_shape_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int out_elements_num_;
|
||||
|
||||
int in_strides0_[10];
|
||||
int in_strides1_[10];
|
||||
int out_strides_[10];
|
||||
int in_strides0_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int in_strides1_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int out_strides_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
|
||||
int multiples0_[10];
|
||||
int multiples1_[10];
|
||||
int multiples0_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int multiples1_[ARITHMETIC_SUPPORT_DIMS_NUM];
|
||||
int eltwise_mode_; // eltwise need
|
||||
} ArithmeticParameter;
|
||||
|
||||
|
|
|
@ -56,8 +56,11 @@ int AddNFp16CPUKernel::AddNParallelRun(int thread_id, float lhs_scale, float rhs
|
|||
int AddNFp16CPUKernel::Run() {
|
||||
elements_num_ = out_tensors_[0]->ElementsNum();
|
||||
auto input0_data = reinterpret_cast<float16_t *>(in_tensors_[0]->MutableData());
|
||||
CHECK_NULL_RETURN(input0_data);
|
||||
auto input1_data = reinterpret_cast<float16_t *>(in_tensors_[1]->MutableData());
|
||||
CHECK_NULL_RETURN(input1_data);
|
||||
auto out_data = reinterpret_cast<float16_t *>(out_tensors_[0]->MutableData());
|
||||
CHECK_NULL_RETURN(out_data);
|
||||
if (static_cast<int>(elements_num_) < op_parameter_->thread_num_) {
|
||||
if (in_tensors_[0]->shape() == in_tensors_[1]->shape()) {
|
||||
ElementAddFp16(input0_data, input1_data, out_data, elements_num_);
|
||||
|
@ -71,6 +74,7 @@ int AddNFp16CPUKernel::Run() {
|
|||
}
|
||||
|
||||
for (size_t i = 2; i < in_tensors_.size(); ++i) {
|
||||
CHECK_NULL_RETURN(in_tensors_[i]->data());
|
||||
if (in_tensors_[i]->shape() == out_tensors_[0]->shape()) {
|
||||
ElementAddFp16(reinterpret_cast<float16_t *>(in_tensors_[i]->data()), out_data, out_data, elements_num_);
|
||||
} else {
|
||||
|
@ -95,6 +99,7 @@ int AddNFp16CPUKernel::Run() {
|
|||
}
|
||||
for (size_t i = 2; i < in_tensors_.size(); ++i) {
|
||||
in1_addr_ = reinterpret_cast<float16_t *>(in_tensors_[i]->MutableData());
|
||||
CHECK_NULL_RETURN(in1_addr_);
|
||||
in2_addr_ = out_data;
|
||||
ret = ParallelLaunch(this->ms_context_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -55,7 +55,7 @@ int ArithmeticCPUKernel::ReSize() {
|
|||
CalcMultiplesAndStrides(param_);
|
||||
if (param_->broadcasting_) {
|
||||
outside_ = 1;
|
||||
for (int i = static_cast<int>(param_->ndim_) - 1; i >= 0; --i) {
|
||||
for (int i = static_cast<int>(param_->ndim_) - 1; i >= 0 && i < ARITHMETIC_SUPPORT_DIMS_NUM; --i) {
|
||||
if (param_->in_shape0_[i] != param_->in_shape1_[i]) {
|
||||
break_pos_ = i;
|
||||
break;
|
||||
|
@ -139,6 +139,7 @@ int ArithmeticCPUKernel::ConstTensorBroadCast() {
|
|||
if (input0_ptr_ == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
CHECK_NULL_RETURN(in_tensors_[0]->data());
|
||||
TileConstTensor(in_tensors_[0]->data(), input0_ptr_, param_->ndim_, param_->in_shape0_, param_->in_strides0_,
|
||||
param_->out_strides_, param_->multiples0_);
|
||||
input0_broadcast_ = true;
|
||||
|
@ -155,6 +156,7 @@ int ArithmeticCPUKernel::ConstTensorBroadCast() {
|
|||
FreeConstTileBuff();
|
||||
return RET_ERROR;
|
||||
}
|
||||
CHECK_NULL_RETURN(in_tensors_[1]->data());
|
||||
TileConstTensor(in_tensors_[1]->data(), input1_ptr_, param_->ndim_, param_->in_shape1_, param_->in_strides1_,
|
||||
param_->out_strides_, param_->multiples1_);
|
||||
input1_broadcast_ = true;
|
||||
|
@ -393,6 +395,7 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
|
|||
if (count <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
CHECK_LESS_RETURN(ARITHMETIC_SUPPORT_DIMS_NUM, param_->ndim_);
|
||||
int offset = stride * task_id * data_type_len_;
|
||||
/* run opt function, one of input is scalar */
|
||||
if (IsScalarClac()) { // 2 32 240 240, 1 1 1 1
|
||||
|
@ -442,11 +445,14 @@ int ArithmeticCPUKernel::Run() {
|
|||
}
|
||||
if (!input0_broadcast_) {
|
||||
input0_ptr_ = in_tensors_[0]->data();
|
||||
CHECK_NULL_RETURN(input0_ptr_);
|
||||
}
|
||||
if (!input1_broadcast_) {
|
||||
input1_ptr_ = in_tensors_[1]->data();
|
||||
CHECK_NULL_RETURN(input1_ptr_);
|
||||
}
|
||||
output_ptr_ = out_tensors_[0]->data();
|
||||
CHECK_NULL_RETURN(output_ptr_);
|
||||
return ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ using mindspore::schema::PrimitiveType_Unique;
|
|||
namespace mindspore::kernel {
|
||||
int UniqueCPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), 1);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 2);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), C2NUM);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -34,11 +34,11 @@ int UniqueCPUKernel::ReSize() { return RET_OK; }
|
|||
|
||||
int UniqueCPUKernel::Run() {
|
||||
auto input = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
MS_ASSERT(input);
|
||||
CHECK_NULL_RETURN(input);
|
||||
auto output0 = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
MS_ASSERT(output0);
|
||||
CHECK_NULL_RETURN(output0);
|
||||
auto output1 = reinterpret_cast<int *>(out_tensors_.at(1)->MutableData());
|
||||
MS_ASSERT(output1);
|
||||
CHECK_NULL_RETURN(output1);
|
||||
|
||||
int output0_len = 0;
|
||||
Unique(input, in_tensors_.at(0)->ElementsNum(), output0, &output0_len, output1);
|
||||
|
|
|
@ -52,6 +52,8 @@ int ArithmeticsInt8Launch(void *cdata, int task_id, float lhs_scale, float rhs_s
|
|||
} // namespace
|
||||
|
||||
int ArithmeticInt8CPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 1);
|
||||
switch (op_parameter_->type_) {
|
||||
case PrimitiveType_Equal:
|
||||
arithmetic_run_ = ElementEqualInt8;
|
||||
|
@ -105,8 +107,11 @@ int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; }
|
|||
|
||||
int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
|
||||
auto input0_data = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData());
|
||||
auto input1_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData());
|
||||
CHECK_NULL_RETURN(input0_data);
|
||||
auto input1_data = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData());
|
||||
CHECK_NULL_RETURN(input1_data);
|
||||
auto output_data = reinterpret_cast<uint8_t *>(out_tensors_[0]->MutableData());
|
||||
CHECK_NULL_RETURN(output_data);
|
||||
auto element_num = out_tensors_[0]->ElementsNum();
|
||||
auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
|
||||
int error_code;
|
||||
|
@ -125,7 +130,7 @@ int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
|
|||
return error_code;
|
||||
}
|
||||
} else if (arithmetic_run_ != nullptr) {
|
||||
error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num, &quant_args_);
|
||||
error_code = arithmetic_run_(input0_data, input1_data, output_data, element_num, &quant_args_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code;
|
||||
return error_code;
|
||||
|
@ -141,7 +146,9 @@ int ArithmeticInt8CPUKernel::Run() {
|
|||
auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
|
||||
if (param->broadcasting_) {
|
||||
auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->MutableData());
|
||||
CHECK_NULL_RETURN(input_data0);
|
||||
auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->MutableData());
|
||||
CHECK_NULL_RETURN(input_data1);
|
||||
tile_data0_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_[0]->Size()));
|
||||
tile_data1_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(out_tensors_[0]->Size()));
|
||||
if (tile_data0_ == nullptr || tile_data1_ == nullptr) {
|
||||
|
|
|
@ -39,6 +39,8 @@ DepthToSpaceInt8CPUKernel::~DepthToSpaceInt8CPUKernel() {
|
|||
}
|
||||
|
||||
int DepthToSpaceInt8CPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), 1);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 1);
|
||||
param_->data_type_size_ = sizeof(int8_t);
|
||||
|
||||
in_quant_arg_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
|
||||
|
@ -48,6 +50,7 @@ int DepthToSpaceInt8CPUKernel::Prepare() {
|
|||
}
|
||||
auto *input_tensor = in_tensors_.at(kInputIndex);
|
||||
auto in_quant_args = input_tensor->quant_params();
|
||||
CHECK_LESS_RETURN(in_quant_args.size(), 1);
|
||||
in_quant_arg_->scale_ = in_quant_args.front().scale;
|
||||
in_quant_arg_->zp_ = in_quant_args.front().zeroPoint;
|
||||
|
||||
|
@ -58,6 +61,7 @@ int DepthToSpaceInt8CPUKernel::Prepare() {
|
|||
}
|
||||
auto *out_tensor = out_tensors_.at(kOutputIndex);
|
||||
auto out_quant_args = out_tensor->quant_params();
|
||||
CHECK_LESS_RETURN(out_quant_args.size(), 1);
|
||||
out_quant_arg_->scale_ = out_quant_args.front().scale;
|
||||
out_quant_arg_->zp_ = out_quant_args.front().zeroPoint;
|
||||
if (!InferShapeDone()) {
|
||||
|
@ -70,7 +74,9 @@ int DepthToSpaceInt8CPUKernel::Run() {
|
|||
auto input = in_tensors_[0];
|
||||
auto output = out_tensors_[0];
|
||||
const int8_t *input_data = reinterpret_cast<const int8_t *>(input->data());
|
||||
CHECK_NULL_RETURN(input_data);
|
||||
int8_t *output_data = reinterpret_cast<int8_t *>(output->data());
|
||||
CHECK_NULL_RETURN(output_data);
|
||||
auto in_shape = input->shape();
|
||||
if (std::abs(in_quant_arg_->scale_ - out_quant_arg_->scale_) < FLT_EPSILON &&
|
||||
in_quant_arg_->zp_ == out_quant_arg_->zp_) {
|
||||
|
|
|
@ -26,6 +26,7 @@ namespace mindspore::kernel {
|
|||
int FullconnectionInt8CPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 1);
|
||||
CHECK_NULL_RETURN(param_);
|
||||
param_->batch = 1;
|
||||
param_->a_transpose_ = false;
|
||||
param_->b_transpose_ = true;
|
||||
|
@ -45,6 +46,7 @@ int FullconnectionInt8CPUKernel::Prepare() {
|
|||
}
|
||||
|
||||
int FullconnectionInt8CPUKernel::ReSize() {
|
||||
CHECK_NULL_RETURN(param_);
|
||||
int row = 1;
|
||||
for (size_t i = 0; i < out_tensors_.at(0)->shape().size() - 1; ++i) {
|
||||
row *= (out_tensors_.at(0)->shape()).at(i);
|
||||
|
|
|
@ -31,9 +31,12 @@ using mindspore::schema::PrimitiveType_Gather;
|
|||
namespace mindspore::kernel {
|
||||
int GatherInt8CPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 1);
|
||||
axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_;
|
||||
auto in_quant_args = in_tensors_.at(0)->quant_params();
|
||||
CHECK_LESS_RETURN(in_quant_args.size(), 1);
|
||||
auto out_quant_args = out_tensors_.at(0)->quant_params();
|
||||
CHECK_LESS_RETURN(out_quant_args.size(), 1);
|
||||
param_.alpha_ = in_quant_args.front().scale / out_quant_args.front().scale;
|
||||
param_.zp_in_ = in_quant_args.front().zeroPoint;
|
||||
param_.zp_out_ = out_quant_args.front().zeroPoint;
|
||||
|
|
|
@ -36,10 +36,12 @@ int PowerInt8CPUKernel::Prepare() {
|
|||
MSLITE_CHECK_PTR(output);
|
||||
|
||||
auto in_quant_args = input->quant_params();
|
||||
CHECK_LESS_RETURN(in_quant_args.size(), 1);
|
||||
param_->quant_arg_.in_args_.scale_ = in_quant_args.front().scale;
|
||||
param_->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint;
|
||||
|
||||
auto out_quant_args = output->quant_params();
|
||||
CHECK_LESS_RETURN(out_quant_args.size(), 1);
|
||||
param_->quant_arg_.out_args_.scale_ = out_quant_args.front().scale;
|
||||
param_->quant_arg_.out_args_.zp_ = out_quant_args.front().zeroPoint;
|
||||
|
||||
|
@ -69,6 +71,7 @@ int PowerInt8CPUKernel::DoPower(int task_id) {
|
|||
if (in_tensors_.size() == 2) {
|
||||
auto exp_tensor = in_tensors_.at(1);
|
||||
auto exp_quant_args = exp_tensor->quant_params();
|
||||
CHECK_LESS_RETURN(exp_quant_args.size(), 1);
|
||||
param_->quant_arg_.exp_args_.scale_ = exp_quant_args.front().scale;
|
||||
param_->quant_arg_.exp_args_.zp_ = exp_quant_args.front().zeroPoint;
|
||||
exp_ptr = reinterpret_cast<int8_t *>(exp_tensor->MutableData());
|
||||
|
|
|
@ -27,6 +27,8 @@ namespace {
|
|||
constexpr size_t kMaxShapeSize = 20;
|
||||
} // namespace
|
||||
int TransposeInt8CPUKernel::Prepare() {
|
||||
CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
|
||||
CHECK_LESS_RETURN(out_tensors_.size(), 1);
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -54,7 +56,7 @@ int TransposeInt8CPUKernel::ReSize() {
|
|||
// get perm data
|
||||
auto perm_tensor = in_tensors_.at(1);
|
||||
int *perm_data = reinterpret_cast<int *>(perm_tensor->data());
|
||||
MS_ASSERT(perm_data != nullptr);
|
||||
CHECK_NULL_RETURN(perm_data);
|
||||
transpose_param_->num_axes_ = perm_tensor->ElementsNum();
|
||||
for (int i = 0; i < transpose_param_->num_axes_; ++i) {
|
||||
transpose_param_->perm_[i] = perm_data[i];
|
||||
|
@ -70,11 +72,11 @@ int TransposeInt8CPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int TransposeInt8CPUKernel::DoTranspose(int task_id) {
|
||||
MS_ASSERT(in_ptr_);
|
||||
MS_ASSERT(out_ptr_);
|
||||
MS_ASSERT(in_shape_);
|
||||
MS_ASSERT(out_shape_);
|
||||
MS_ASSERT(transpose_param_);
|
||||
CHECK_NULL_RETURN(in_ptr_);
|
||||
CHECK_NULL_RETURN(out_ptr_);
|
||||
CHECK_NULL_RETURN(in_shape_);
|
||||
CHECK_NULL_RETURN(out_shape_);
|
||||
CHECK_NULL_RETURN(transpose_param_);
|
||||
TransposeDimsInt8(in_ptr_, out_ptr_, out_shape_, transpose_param_, task_id, op_parameter_->thread_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -106,7 +108,9 @@ int TransposeInt8CPUKernel::Run() {
|
|||
auto out_dims = out_tensor->shape();
|
||||
|
||||
in_ptr_ = reinterpret_cast<int8_t *>(in_tensor->data());
|
||||
CHECK_NULL_RETURN(in_ptr_);
|
||||
out_ptr_ = reinterpret_cast<int8_t *>(out_tensor->data());
|
||||
CHECK_NULL_RETURN(out_ptr_);
|
||||
GetNHNCTransposeFunc(in_tensor, out_tensor, transpose_param_);
|
||||
if (NHNCTransposeFunc_ != nullptr) {
|
||||
NHNCTransposeFunc_(in_ptr_, out_ptr_, nhnc_param_[0], nhnc_param_[1], nhnc_param_[2]);
|
||||
|
|
Loading…
Reference in New Issue