!4387 fix coding specification according to cppcheck report

Merge pull request !4387 from 徐安越/master
This commit is contained in:
mindspore-ci-bot 2020-08-14 16:17:06 +08:00 committed by Gitee
commit 05b03fe017
42 changed files with 580 additions and 520 deletions

View File

@ -31,7 +31,7 @@ int Power::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::
}
auto output_tensor = outputs[0];
MS_ASSERT(output_tensor != nullptr);
if (exp_tensor) {
if (exp_tensor != nullptr) {
if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) {
MS_LOG(ERROR) << "Power inputs shape or type is not equal!";
return RET_INPUT_TENSOR_ERROR;

View File

@ -48,7 +48,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
case kNumberTypeFloat32: {
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) {
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;
}

View File

@ -97,8 +97,8 @@ int Convolution3x3FP16CPUKernel::InitWeightBias() {
}
int Convolution3x3FP16CPUKernel::InitTmpBuffer() {
int tile_num = 16;
int k_plane = 36;
const int tile_num = 16;
const int k_plane = 36;
int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
int oC8 = UP_DIV(conv_param_->output_channel_, C8NUM);

View File

@ -261,7 +261,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten
conv_param->input_w_ = inputs.front()->Width();
conv_param->output_h_ = outputs.front()->Height();
conv_param->output_w_ = outputs.front()->Width();
bool use_winograd;
bool use_winograd = false;
int out_unit;
InputTransformUnitFunc input_trans_func = nullptr;
OutputTransformUnitFunc output_trans_func = nullptr;

View File

@ -61,7 +61,7 @@ int Convolution3x3CPUKernel::InitWeightBias() {
oc_block = C8NUM;
oc_block_num = UP_DIV(output_channel, C8NUM);
#endif
int k_plane = 16;
const int k_plane = 16;
// init weight
size_t transformed_size = iC4 * C4NUM * oc_block_num * oc_block * k_plane * sizeof(float);
transformed_filter_addr_ = reinterpret_cast<float *>(malloc(transformed_size));
@ -93,7 +93,7 @@ int Convolution3x3CPUKernel::InitWeightBias() {
int Convolution3x3CPUKernel::InitTmpBuffer() {
int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
int oC4 = UP_DIV(conv_param_->output_channel_, C4NUM);
int k_plane = 16;
const int k_plane = 16;
/*=============================tile_buffer_============================*/
size_t tile_buffer_size = thread_count_ * TILE_NUM * k_plane * iC4 * C4NUM * sizeof(float);

View File

@ -52,6 +52,10 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int
std::vector<int> strides = trans_weight->GetStride();
int kernel_plane_stride = channel_in;
if (oc_block == 0) {
MS_LOG(ERROR) << "Divide by zero";
return;
}
for (int i = 0; i < channel_out; i++) {
int out_c_block = i / oc_block;
int out_c_res = i % oc_block;

View File

@ -84,7 +84,7 @@ int LstmCPUKernel::InitWeightBias() {
}
auto bias_data = reinterpret_cast<float *>(in_tensors_.at(3)->Data());
int state_bias_offset = 4 * lstm_parm_->hidden_size_;
const int state_bias_offset = 4 * lstm_parm_->hidden_size_;
for (int i = 0; i < state_bias_offset; i++) {
bias_ptr_[i] = bias_data[i] + bias_data[i + state_bias_offset];
}

View File

@ -66,7 +66,7 @@ int PowerCPUKernel::RunImpl(int task_id) {
exp_addr = reinterpret_cast<float *>(in_tensors_[1]->Data());
broadcast = false;
}
float *cur_exp;
float *cur_exp = nullptr;
if (broadcast) {
cur_exp = &power_;
} else {

View File

@ -161,7 +161,7 @@ int SqueezeInt8CPUKernel::Run() {
if (ret != RET_OK) {
MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: ";
}
return RET_OK;
return ret;
}
int SqueezeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {

View File

@ -35,7 +35,7 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weight, float16_t *bias, size_t step,
size_t ic4, size_t out_channel, size_t offset, size_t mode, size_t writeC4, size_t relu,
size_t relu6) {
int tile_n = 16;
const int tile_n = 16;
for (int i = 0; i < out_channel; i++) {
int oc8_block = i / 8;
int oc8_res = i % 8;
@ -76,7 +76,7 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
void IndirectGemmFp16_16x8_tmp(float16_t *output, float16_t *input, float16_t *weight, const float16_t *bias,
size_t step, size_t ic4, size_t output_channel, size_t offset, size_t mode,
size_t writeC4, size_t relu, size_t relu6) {
int tile_num = 16;
const int tile_num = 16;
if (mode) {
for (int i = 0; i < tile_num; i++) {
int input_tile_offset = i * C4NUM;
@ -175,8 +175,8 @@ void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16
// todo
int thread_count = conv_param->thread_num_;
int tile_num = 16;
int output_unit = 4;
int k_plane = 36;
const int output_unit = 4;
const int k_plane = 36;
int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
int oc8 = UP_DIV(conv_param->output_channel_, C8NUM);

View File

@ -190,14 +190,16 @@ void Conv3x3Fp16InputUnit(float16_t *tmp_data, float16_t *trans_input_data, size
void Conv3x3Fp16InputTransform(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data,
int start_index, int real_cal_num, int out_w_block, ConvParameter *conv_param) {
// input data format : nhwc
int output_unit = 4;
const int output_unit = 4;
int input_channel = conv_param->input_channel_;
int input_width = conv_param->input_w_;
int input_height = conv_param->input_h_;
int pad_w = conv_param->pad_w_;
int pad_h = conv_param->pad_h_;
int ic4 = UP_DIV(input_channel, C4NUM);
if (out_w_block == 0) {
return;
}
for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
int x_id = start_index + cal_id;
int origin_x = (x_id % out_w_block) * output_unit - pad_w;
@ -511,7 +513,9 @@ void Conv3x3Fp16OutputTransform(const float16_t *gemm_out, float16_t *out_data,
int output_h = conv_param->output_h_;
int out_h_block = UP_DIV(output_h, C4NUM);
int oc8 = UP_DIV(output_channel, C8NUM);
if (out_w_block == 0) {
return;
}
for (int i = 0; i < real_cal_num; i++) {
int out_w_index = (start_index + i) % out_w_block;
int out_h_index = (start_index + i) / out_w_block;

View File

@ -65,6 +65,9 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr
void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_ptr, size_t output_channel,
size_t plane_size, size_t stride, bool is_relu, bool is_relu6, int size) {
if (size == 0) {
return;
}
for (int oc = 0; oc < output_channel; oc++) {
int oc_div = oc / size, oc_mod = oc % size;
for (int hw = 0; hw < plane_size; hw++) {

View File

@ -142,7 +142,7 @@ void ConvSWFp32(const float *input_data, const float *packed_weight, const float
int ic4 = slidingWindow_param->ic4_channel_ / C4NUM;
int oc4_res = conv_param->output_channel_ % C4NUM;
const float *src = input_data;
float *dst;
float *dst = NULL;
if (oc4_res == 0) {
dst = output_data;
} else {

View File

@ -328,36 +328,36 @@ void ConvDw3x3Fp32FilterTrans(float *trans_weight, float *weight, int oc4) {
float dst01 = (local_ptr + 4)[0];
float dst02 = (local_ptr + 8)[0];
float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
const float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
const float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
const float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
const float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
const float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
const float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
float dst30 = (local_ptr + 24)[0];
float dst31 = (local_ptr + 28)[0];
float dst32 = (local_ptr + 32)[0];
float m00 = dst00;
float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
const float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
const float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
float m03 = dst02;
float m10 = dst10;
float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
const float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
const float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
float m13 = dst12;
float m20 = dst20;
float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
const float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
const float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
float m23 = dst22;
float m30 = dst30;
float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
const float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
const float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
float m33 = dst32;
*(dst + j) = m00;
@ -387,7 +387,7 @@ void ConvDw3x3Fp32FilterTrans(float *trans_weight, float *weight, int oc4) {
void ConvDw3x3Fp32InputTrans(const float *input_data, float *trans_input, float *block_buffer, int out_h_block,
int out_w_block, const ConvParameter *conv_param) {
int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
int input_unit = 4;
const int input_unit = 4;
memset(trans_input, 0, out_h_block * out_h_block * 16 * C4NUM * sizeof(float));
for (int oh = 0; oh < out_h_block; oh++) {
@ -426,7 +426,7 @@ void ConvDw3x3Fp32InputTrans(const float *input_data, float *trans_input, float
// todo yangruoqi: implement assembly
void ConvDw3x3Fp32Winograd(float *trans_buffer, const float *weight, int out_h_block, int out_w_block) {
int unit = 4;
const int unit = 4;
for (int oh = 0; oh < out_h_block; oh++) {
float *buf_oh = trans_buffer + oh * out_w_block * 16 * C4NUM;
for (int ow = 0; ow < out_w_block; ow++) {
@ -583,7 +583,7 @@ void ConvDw3x3Fp32OutputTrans(float *trans_buffer, float *output_data, const flo
int oc4 = UP_DIV(conv_param->output_channel_, C4NUM);
bool h_in_range = true;
for (int oh = 0; oh < out_h_block; oh++) {
int real_oh = 2 * oh;
const int real_oh = 2 * oh;
if ((oh + 1) * 2 > conv_param->output_h_) {
h_in_range = false;
}
@ -592,7 +592,7 @@ void ConvDw3x3Fp32OutputTrans(float *trans_buffer, float *output_data, const flo
float *output_oh = output_data + real_oh * conv_param->output_w_ * oc4 * C4NUM;
for (int ow = 0; ow < out_w_block; ow++) {
int real_ow = 2 * ow;
const int real_ow = 2 * ow;
if ((ow + 1) * 2 > conv_param->output_w_) {
w_in_range = false;
}

View File

@ -47,13 +47,13 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input
int y_bottom = (int)(floor(actual_y));
int y_top = y_bottom + 1 < in_h ? (y_bottom + 1) : (in_h - 1);
float y_top_weight = actual_y - (float)(y_bottom);
float y_bottom_weight = 1.0f - y_top_weight;
const float y_bottom_weight = 1.0f - y_top_weight;
for (w = 0; w < new_width; w++) {
float actual_x = (float)(w)*width_scale;
int x_left = (int)(floor(actual_x));
int x_right = x_left + 1 < in_w ? (x_left + 1) : (in_w - 1);
float x_right_weight = actual_x - (float)(x_left);
float x_left_weight = 1.0f - x_right_weight;
const float x_left_weight = 1.0f - x_right_weight;
c = 0;
#ifdef ENABLE_NEON
for (; c <= in_c - 4; c += 4) {

View File

@ -30,7 +30,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, const int *in_shape, c
int pooled_width = param->pooledW_;
int in_stride[DIMENSION_4D];
int out_stride[DIMENSION_4D];
int roi_stride = 5;
const int roi_stride = 5;
in_stride[DIMENSION_4D - 1] = 1;
out_stride[DIMENSION_4D - 1] = 1;
for (int i = dim - 2; i >= 0; --i) {

View File

@ -138,7 +138,7 @@ void DoPadding(const float *input, float *padded_input, SpaceToBatchParameter pa
}
int SpaceToBatch(const float *input, float *output, SpaceToBatchParameter param, float *tmp_space[3]) {
float *padded_input;
float *padded_input = NULL;
int ret;
if (param.need_paddings_) {
if (tmp_space[0] == NULL || tmp_space[1] == NULL || tmp_space[2] == NULL) {

View File

@ -30,7 +30,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
int output_h = pooling_param->output_h_;
int output_batch = pooling_param->output_batch_;
const float *inPtr;
const float *inPtr = NULL;
for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;
// int pad_top = padding[2];
@ -119,7 +119,7 @@ void MaxPoolingGrad(const float *dy, const int *indices, float *output_ptr, Pool
const float *yt = (const float *)(dy);
const int *pos = (const int *)(indices);
float *out;
float *out = NULL;
if (1) { // grads->layout() == Tensor::nhwc)
for (int ib = 0; ib < output_batch; ib++) {

View File

@ -34,7 +34,7 @@ void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_co
void DoArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count,
int axis_count, int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
bool out_value = param->out_value_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
for (int i = 0; i < pre_axis_count; ++i) {

View File

@ -28,7 +28,7 @@ void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const in
size_t output_offset = 0;
size_t in_stride_h = in_w * in_c;
size_t in_stride_n = in_stride_h * in_h;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
float scale = in_quant_arg->scale_ * output_inverse_scale;
float bias = -in_quant_arg->zp_ * scale;
int32_t output_zp = out_quant_arg->zp_;
@ -76,7 +76,7 @@ void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_
size_t in_stride_h = in_w * in_c;
size_t in_stride_n = in_stride_h * in_h;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
float scale = in_quant_arg->scale_ * output_inverse_scale;
float bias = -in_quant_arg->zp_ * scale;
int32_t output_zp = out_quant_arg->zp_;

View File

@ -20,7 +20,7 @@
void Int8Concat(int8_t **inputs, int8_t *output, ConcatParameter *para, int axis, int64_t real_dst_count, int task_id) {
float output_scale = para->quant_arg_.out_args_.scale_;
float output_inverse_scale = 1.f / output_scale;
const float output_inverse_scale = 1.f / output_scale;
int input_num = para->input_num_;
int count_unit_ = para->count_unit_;
int after_axis_size = para->after_axis_size;

View File

@ -201,7 +201,7 @@ void Conv3x3Uint8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, i
#ifdef ENABLE_ARM
IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t));
#else
int input_unit_square = 16;
const int input_unit_square = 16;
for (int c = 0; c < oc4; c++) {
int filter_oc_offset = c * input_unit_square * ic8 * C8NUM * C4NUM;
int dst_oc_offset = c * input_unit_square * C4NUM;

View File

@ -22,7 +22,7 @@ void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape,
int32_t in_shape_dim2 = in_shape[2];
int32_t in_shape_dim1 = in_shape[1];
size_t copy_size = block_size * param->out_stride_dim2_;
float output_inverse_scale = 1.f / out_quant_arg->scale_;
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
float scale = in_quant_arg->scale_ * output_inverse_scale;
float bias = -in_quant_arg->zp_ * scale;
int32_t output_zp = out_quant_arg->zp_;

View File

@ -36,8 +36,8 @@ void AvgPoolingInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParamete
float output_scale = pooling_param->quant_args_[1][0].scale_;
int output_zp = pooling_param->quant_args_[1][0].zp_;
double real_multiplier = input_scale / output_scale;
int8_t out_min = INT8_MIN;
int8_t out_max = INT8_MAX;
const int8_t out_min = INT8_MIN;
const int8_t out_max = INT8_MAX;
for (int batch = 0; batch < output_batch; batch++) {
int in_batch_offset = batch * in_h * in_w * channel;
@ -91,8 +91,8 @@ void AvgPoolingOptInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParam
int out_tile_count = UP_DIV(out_plane, TILE_NUM);
int thread_num = pooling_param->thread_num_;
int c8 = UP_DIV(channel, C8NUM);
int8_t out_min = INT8_MIN;
int8_t out_max = INT8_MAX;
const int8_t out_min = INT8_MIN;
const int8_t out_max = INT8_MAX;
for (int batch = 0; batch < output_batch; batch++) {
int in_batch_offset = batch * in_h * in_w * channel;

View File

@ -20,7 +20,7 @@
void prelu(int8_t *inputs, int8_t *output_ptr, PreluParameter *quant_prelu_parm, int task_id) {
float output_scale = quant_prelu_parm->quant_arg.out_args_.scale_;
int output_zp = quant_prelu_parm->quant_arg.out_args_.zp_;
float output_inverse_scale = 1.f / output_scale;
const float output_inverse_scale = 1.f / output_scale;
int output_dim = quant_prelu_parm->input_dim_;
QuantArg *input_quant = NULL;

View File

@ -22,7 +22,7 @@ void Int8Reshape(int8_t *input_ptr, int8_t *output_ptr, int64_t real_dst_count,
if (para.in_args_.scale_ == para.out_args_.scale_ && para.in_args_.zp_ == para.out_args_.zp_) {
memcpy(output_ptr, input_ptr, real_dst_count);
} else {
float output_inverse_scale = 1.f / para.out_args_.scale_;
const float output_inverse_scale = 1.f / para.out_args_.scale_;
float scale = para.in_args_.scale_ * output_inverse_scale;
float bias = -para.in_args_.zp_ * scale;
int32_t output_zp = para.out_args_.zp_;

View File

@ -115,6 +115,9 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
}
void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) {
if (out_value == 0) {
return;
}
*scale = (in_value * (1 << 10) + out_value / 2) / out_value;
if (align_corners && out_value > 1) {
*scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1);
@ -133,6 +136,9 @@ void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int3
void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
int32_t *nearest) {
if (new_size == 0) {
return;
}
*nearest = (in_size * pos) / new_size;
if (align_corners) {
*nearest = ((in_size - 1) * pos + (new_size - 1) / 2) / (new_size - 1);

View File

@ -20,11 +20,11 @@
void Squeeze(int8_t **inputs, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, size_t osize) {
float output_scale = quant_Squeeze_parm->out_quant_args_.scale_;
float output_inverse_scale = 1.f / output_scale;
const float output_inverse_scale = 1.f / output_scale;
QuantArg *input_quant = quant_Squeeze_parm->in_quant_args_;
int output_zp = quant_Squeeze_parm->out_quant_args_.zp_;
int i = 0;
const int i = 0;
int8_t *input_ptr = inputs[0];
for (int j = task_id; j < osize; j += para_->op_parameter_.thread_num_) {
float scale = input_quant[i].scale_ * output_inverse_scale;

View File

@ -21,6 +21,9 @@
void PackWeightFp32(float *weight_data, ConvParameter *conv_param, float *packed_weight, int oc_block,
int oc_block_num) {
// original weight format : ohwi
if (oc_block_num == 0) {
return;
}
int kernel_h = conv_param->kernel_h_;
int kernel_w = conv_param->kernel_w_;
int in_channel = conv_param->input_channel_;
@ -30,7 +33,7 @@ void PackWeightFp32(float *weight_data, ConvParameter *conv_param, float *packed
int pack_weight_size = oc_block * oc_block_num * ic4 * C4NUM * kernel_plane;
int unit_size = oc_block * C4NUM;
int block_size = pack_weight_size / oc_block_num;
const int block_size = pack_weight_size / oc_block_num;
for (int m = 0; m < kernel_plane; m++) {
int kernel_plane_stride = m * in_channel;

View File

@ -19,6 +19,9 @@
#include "nnacl/prior_box.h"
int PriorBox(const float *input_data, float *output_data, const size_t size, const int tid, const int thread_num) {
if (thread_num == 0) {
return NNACL_ERR;
}
size_t unit_size = size / thread_num;
if (tid == thread_num - 1) {
size_t tail_size = size - unit_size * tid;

View File

@ -26,7 +26,7 @@ int Find(float *array, int len, float target) {
}
void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1) {
output0_len = 0;
*output0_len = 0;
for (int i = 0; i < input_len; i++) {
int idx = Find(output0, *output0_len, input[i]);
if (idx != -1) {

View File

@ -28,7 +28,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
int pad_w = conv_param->pad_w_;
int input_h = conv_param->input_h_;
int input_w = conv_param->input_w_;
if (out_w_block_num == 0) {
return;
}
for (int c = 0; c < cal_num; c++) { // actual tiled number
int src_x_s = (out_tile_index % out_w_block_num) * output_unit - pad_w;
int src_y_s = (out_tile_index / out_w_block_num) * output_unit - pad_h;
@ -83,7 +85,9 @@ void WinogradOutputTransform(const float *gemm_out, float *tmp_out_data, const f
int output_channel = conv_param->output_channel_;
int oc4 = UP_DIV(output_channel, C4NUM);
int input_unit = conv_param->input_unit_;
if (output_unit_num == 0) {
return;
}
for (int i = 0; i < cal_num; i++) {
int dst_x_s = out_tile_index % output_unit_num;
int dst_y_s = out_tile_index / output_unit_num;
@ -281,7 +285,9 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
int pad_h = conv_param->pad_h_;
int ic4 = UP_DIV(input_channel, C4NUM);
int input_unit = 4;
if (out_w_block == 0) {
return;
}
for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
int x_id = start_index + cal_id;
int origin_x = (x_id % out_w_block) * OUPUT_UNIT - pad_w;
@ -328,8 +334,11 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
void Conv3x3Fp32FilterTransform(float *weight_data, float *trans_weight, int iC4, int output_channel, int kernel_plane,
int oc_block) {
int input_unit = 4;
const int input_unit = 4;
int dst_step = iC4 * C4NUM * oc_block;
if (oc_block == 0) {
return;
}
for (int o = 0; o < output_channel; o++) {
int oc_block_num = o / oc_block;
int oc_block_rem = o % oc_block;
@ -485,36 +494,36 @@ void Conv3x3Fp32FilterTransform(float *weight_data, float *trans_weight, int iC4
float dst01 = (local_ptr + 4)[0];
float dst02 = (local_ptr + 8)[0];
float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
const float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
const float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
const float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
const float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
const float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
const float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
float dst30 = (local_ptr + 24)[0];
float dst31 = (local_ptr + 28)[0];
float dst32 = (local_ptr + 32)[0];
float m00 = dst00;
float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
const float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
const float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
float m03 = dst02;
float m10 = dst10;
float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
const float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
const float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
float m13 = dst12;
float m20 = dst20;
float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
const float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
const float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
float m23 = dst22;
float m30 = dst30;
float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
const float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
const float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
float m33 = dst32;
*(dst_ic4_ptr + j * 8) = m00;
@ -652,8 +661,10 @@ void Conv3x3Fp32OutputTransform(const float *gemm_out, float *out_data, const fl
int output_w = conv_param->output_w_;
int output_h = conv_param->output_h_;
int oc4 = UP_DIV(output_channel, C4NUM);
int input_unit = 4;
const int input_unit = 4;
if (out_w_block == 0) {
return;
}
for (int i = 0; i < real_cal_num; i++) {
int out_w_index = (start_index + i) % out_w_block;
int out_h_index = (start_index + i) / out_w_block;
@ -855,9 +866,11 @@ void Conv3x3Uint8InputTransform(const int16_t *input_data, int16_t *trans_input,
int pad_h = conv_param->pad_h_;
ConvQuantArg quant_arg = conv_param->conv_quant_arg_;
int input_zp = quant_arg.input_quant_args_[0].zp_;
int ic8 = UP_DIV(input_channel, C8NUM);
int input_unit = 4;
const int ic8 = UP_DIV(input_channel, C8NUM);
const int input_unit = 4;
if (out_w_block == 0) {
return;
}
for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
int x_id = start_index + cal_id;
int origin_x = (x_id % out_w_block) * OUPUT_UNIT - pad_w;
@ -890,7 +903,7 @@ void Conv3x3Uint8InputTransform(const int16_t *input_data, int16_t *trans_input,
void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weight, int iC8, int output_channel,
int kernel_plane) {
int input_unit = 4;
const int input_unit = 4;
int dst_step = iC8 * C8NUM * C4NUM;
for (int o = 0; o < output_channel; o++) {
int oc4_block_num = o / C4NUM;
@ -1441,9 +1454,11 @@ void Conv3x3Uint8OutputTransform(const int32_t *gemm_out, int8_t *out_data, cons
int output_channel = conv_param->output_channel_;
int output_w = conv_param->output_w_;
int output_h = conv_param->output_h_;
int oc4 = UP_DIV(output_channel, C4NUM);
int input_unit = 4;
const int oc4 = UP_DIV(output_channel, C4NUM);
const int input_unit = 4;
if (out_w_block == 0) {
return;
}
for (int i = 0; i < real_cal_num; i++) {
int out_w_index = (start_index + i) % out_w_block;
int out_h_index = (start_index + i) / out_w_block;

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,9 @@ __kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t in
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
if (b == 0) {
return;
}
write_imagef(output, (int2)(X, Y), a / b);
}

View File

@ -249,6 +249,10 @@ int ConvolutionOpenCLKernel::GetGlobalLocal(std::vector<size_t> *global, std::ve
size_t global_c = UP_DIV(UP_DIV(param->output_channel_, C4NUM), work_group_size[2]) * work_group_size[2];
size_t local_c = GetBiggestDivider(global_c, max_z_size);
if (local_c == 0) {
MS_LOG(ERROR) << "Divide by zero";
return RET_ERROR;
}
size_t local_hw_size = std::min<size_t>(256, max_work_group_size) / local_c;
size_t local_w = std::min(global_w, local_hw_size);
size_t local_h = std::min(local_hw_size / local_w, global_h);

View File

@ -32,6 +32,10 @@ std::vector<size_t> GetCommonGlobalSize(const std::vector<size_t> &local, const
std::vector<size_t> GetCommonLocalSize(const std::vector<size_t> &global, int max_size) {
size_t wg_z = GetBiggestDividerWithPriority(global[2], 8);
if (wg_z == 0) {
MS_LOG(ERROR) << "Divide by zero";
return {};
}
size_t wg_xy_size = max_size / wg_z;
size_t wg_x = std::min(DivideRoundUp(global[0], 2), wg_xy_size);
size_t wg_y = std::min(wg_xy_size / wg_x, global[1]);

View File

@ -130,6 +130,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size)
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
UnLock();
delete buffer;
return nullptr;
}
device_ptr = static_cast<void *>(buffer);
@ -138,6 +139,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size)
if (host_ptr == nullptr) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
UnLock();
return nullptr;
}
cl::Memory *mem = buffer;
@ -187,6 +189,7 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
UnLock();
delete buffer;
return nullptr;
}
device_ptr = static_cast<void *>(buffer);

View File

@ -195,7 +195,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *>
}
desc.arch = kernel::KERNEL_ARCH::kCPU;
kernel::LiteKernel *kernel;
kernel::LiteKernel *kernel = nullptr;
if (data_type == kNumberTypeFloat32) {
// check if support fp16
kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type};

View File

@ -25,6 +25,10 @@ std::shared_ptr<ModelImpl> Import(const char *model_buf, size_t size) {
return nullptr;
}
// todo hangangqiang remove when copy primitive done
if (size <= 0) {
MS_LOG(ERROR) << "size is zero";
return nullptr;
}
auto *inner_buf = new char[size];
memcpy(inner_buf, model_buf, size);
auto meta_graph = schema::GetMetaGraph(inner_buf);

View File

@ -462,7 +462,7 @@ void BenchmarkFlags::InitInputDataList() {
char *cur_input;
const char *split_c = ",";
cur_input = strtok(input_list, split_c);
while (cur_input) {
while (cur_input != nullptr) {
input_data_list.emplace_back(cur_input);
cur_input = strtok(nullptr, split_c);
}

View File

@ -117,9 +117,9 @@ STATUS OnnxEltwiseParser::Parse(const onnx::GraphProto &onnx_graph,
std::unique_ptr<schema::EltwiseT> attr(new schema::EltwiseT());
if (onnx_node.op_type() == "Prod") {
attr->mode = schema::EltwiseMode_PROD;
} else if (onnx_node.op_type() == "Prod") {
attr->mode = schema::EltwiseMode_SUM;
} else if (onnx_node.op_type() == "Sum") {
attr->mode = schema::EltwiseMode_SUM;
} else if (onnx_node.op_type() == "Maximum") {
attr->mode = schema::EltwiseMode_MAXIMUM;
}

View File

@ -166,6 +166,7 @@ const void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kerne
auto data_size = kernel_num * kernel_size * sizeof(float);
if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) {
MS_LOG(EXCEPTION) << "memset newWeightData failed";
delete[] tmp_weight_data;
return;
}