forked from mindspore-Ecosystem/mindspore
!4387 fix coding specification according to cppcheck report
Merge pull request !4387 from 徐安越/master
This commit is contained in:
commit
05b03fe017
|
@ -31,7 +31,7 @@ int Power::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::
|
|||
}
|
||||
auto output_tensor = outputs[0];
|
||||
MS_ASSERT(output_tensor != nullptr);
|
||||
if (exp_tensor) {
|
||||
if (exp_tensor != nullptr) {
|
||||
if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) {
|
||||
MS_LOG(ERROR) << "Power inputs shape or type is not equal!";
|
||||
return RET_INPUT_TENSOR_ERROR;
|
||||
|
|
|
@ -48,7 +48,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
|
|||
|
||||
case kNumberTypeFloat32: {
|
||||
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (!kernel) {
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -97,8 +97,8 @@ int Convolution3x3FP16CPUKernel::InitWeightBias() {
|
|||
}
|
||||
|
||||
int Convolution3x3FP16CPUKernel::InitTmpBuffer() {
|
||||
int tile_num = 16;
|
||||
int k_plane = 36;
|
||||
const int tile_num = 16;
|
||||
const int k_plane = 36;
|
||||
int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
|
||||
int oC8 = UP_DIV(conv_param_->output_channel_, C8NUM);
|
||||
|
||||
|
|
|
@ -261,7 +261,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten
|
|||
conv_param->input_w_ = inputs.front()->Width();
|
||||
conv_param->output_h_ = outputs.front()->Height();
|
||||
conv_param->output_w_ = outputs.front()->Width();
|
||||
bool use_winograd;
|
||||
bool use_winograd = false;
|
||||
int out_unit;
|
||||
InputTransformUnitFunc input_trans_func = nullptr;
|
||||
OutputTransformUnitFunc output_trans_func = nullptr;
|
||||
|
|
|
@ -61,7 +61,7 @@ int Convolution3x3CPUKernel::InitWeightBias() {
|
|||
oc_block = C8NUM;
|
||||
oc_block_num = UP_DIV(output_channel, C8NUM);
|
||||
#endif
|
||||
int k_plane = 16;
|
||||
const int k_plane = 16;
|
||||
// init weight
|
||||
size_t transformed_size = iC4 * C4NUM * oc_block_num * oc_block * k_plane * sizeof(float);
|
||||
transformed_filter_addr_ = reinterpret_cast<float *>(malloc(transformed_size));
|
||||
|
@ -93,7 +93,7 @@ int Convolution3x3CPUKernel::InitWeightBias() {
|
|||
int Convolution3x3CPUKernel::InitTmpBuffer() {
|
||||
int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM);
|
||||
int oC4 = UP_DIV(conv_param_->output_channel_, C4NUM);
|
||||
int k_plane = 16;
|
||||
const int k_plane = 16;
|
||||
|
||||
/*=============================tile_buffer_============================*/
|
||||
size_t tile_buffer_size = thread_count_ * TILE_NUM * k_plane * iC4 * C4NUM * sizeof(float);
|
||||
|
|
|
@ -52,6 +52,10 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int
|
|||
std::vector<int> strides = trans_weight->GetStride();
|
||||
|
||||
int kernel_plane_stride = channel_in;
|
||||
if (oc_block == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero";
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < channel_out; i++) {
|
||||
int out_c_block = i / oc_block;
|
||||
int out_c_res = i % oc_block;
|
||||
|
|
|
@ -84,7 +84,7 @@ int LstmCPUKernel::InitWeightBias() {
|
|||
}
|
||||
|
||||
auto bias_data = reinterpret_cast<float *>(in_tensors_.at(3)->Data());
|
||||
int state_bias_offset = 4 * lstm_parm_->hidden_size_;
|
||||
const int state_bias_offset = 4 * lstm_parm_->hidden_size_;
|
||||
for (int i = 0; i < state_bias_offset; i++) {
|
||||
bias_ptr_[i] = bias_data[i] + bias_data[i + state_bias_offset];
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ int PowerCPUKernel::RunImpl(int task_id) {
|
|||
exp_addr = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
broadcast = false;
|
||||
}
|
||||
float *cur_exp;
|
||||
float *cur_exp = nullptr;
|
||||
if (broadcast) {
|
||||
cur_exp = &power_;
|
||||
} else {
|
||||
|
|
|
@ -161,7 +161,7 @@ int SqueezeInt8CPUKernel::Run() {
|
|||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: ";
|
||||
}
|
||||
return RET_OK;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int SqueezeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
|
|
|
@ -35,7 +35,7 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
|
|||
void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weight, float16_t *bias, size_t step,
|
||||
size_t ic4, size_t out_channel, size_t offset, size_t mode, size_t writeC4, size_t relu,
|
||||
size_t relu6) {
|
||||
int tile_n = 16;
|
||||
const int tile_n = 16;
|
||||
for (int i = 0; i < out_channel; i++) {
|
||||
int oc8_block = i / 8;
|
||||
int oc8_res = i % 8;
|
||||
|
@ -76,7 +76,7 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
|
|||
void IndirectGemmFp16_16x8_tmp(float16_t *output, float16_t *input, float16_t *weight, const float16_t *bias,
|
||||
size_t step, size_t ic4, size_t output_channel, size_t offset, size_t mode,
|
||||
size_t writeC4, size_t relu, size_t relu6) {
|
||||
int tile_num = 16;
|
||||
const int tile_num = 16;
|
||||
if (mode) {
|
||||
for (int i = 0; i < tile_num; i++) {
|
||||
int input_tile_offset = i * C4NUM;
|
||||
|
@ -175,8 +175,8 @@ void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16
|
|||
// todo
|
||||
int thread_count = conv_param->thread_num_;
|
||||
int tile_num = 16;
|
||||
int output_unit = 4;
|
||||
int k_plane = 36;
|
||||
const int output_unit = 4;
|
||||
const int k_plane = 36;
|
||||
int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
|
||||
int oc8 = UP_DIV(conv_param->output_channel_, C8NUM);
|
||||
|
||||
|
|
|
@ -190,14 +190,16 @@ void Conv3x3Fp16InputUnit(float16_t *tmp_data, float16_t *trans_input_data, size
|
|||
void Conv3x3Fp16InputTransform(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data,
|
||||
int start_index, int real_cal_num, int out_w_block, ConvParameter *conv_param) {
|
||||
// input data format : nhwc
|
||||
int output_unit = 4;
|
||||
const int output_unit = 4;
|
||||
int input_channel = conv_param->input_channel_;
|
||||
int input_width = conv_param->input_w_;
|
||||
int input_height = conv_param->input_h_;
|
||||
int pad_w = conv_param->pad_w_;
|
||||
int pad_h = conv_param->pad_h_;
|
||||
int ic4 = UP_DIV(input_channel, C4NUM);
|
||||
|
||||
if (out_w_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
|
||||
int x_id = start_index + cal_id;
|
||||
int origin_x = (x_id % out_w_block) * output_unit - pad_w;
|
||||
|
@ -511,7 +513,9 @@ void Conv3x3Fp16OutputTransform(const float16_t *gemm_out, float16_t *out_data,
|
|||
int output_h = conv_param->output_h_;
|
||||
int out_h_block = UP_DIV(output_h, C4NUM);
|
||||
int oc8 = UP_DIV(output_channel, C8NUM);
|
||||
|
||||
if (out_w_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < real_cal_num; i++) {
|
||||
int out_w_index = (start_index + i) % out_w_block;
|
||||
int out_h_index = (start_index + i) / out_w_block;
|
||||
|
|
|
@ -65,6 +65,9 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr
|
|||
|
||||
void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_ptr, size_t output_channel,
|
||||
size_t plane_size, size_t stride, bool is_relu, bool is_relu6, int size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
for (int oc = 0; oc < output_channel; oc++) {
|
||||
int oc_div = oc / size, oc_mod = oc % size;
|
||||
for (int hw = 0; hw < plane_size; hw++) {
|
||||
|
|
|
@ -142,7 +142,7 @@ void ConvSWFp32(const float *input_data, const float *packed_weight, const float
|
|||
int ic4 = slidingWindow_param->ic4_channel_ / C4NUM;
|
||||
int oc4_res = conv_param->output_channel_ % C4NUM;
|
||||
const float *src = input_data;
|
||||
float *dst;
|
||||
float *dst = NULL;
|
||||
if (oc4_res == 0) {
|
||||
dst = output_data;
|
||||
} else {
|
||||
|
|
|
@ -328,36 +328,36 @@ void ConvDw3x3Fp32FilterTrans(float *trans_weight, float *weight, int oc4) {
|
|||
float dst01 = (local_ptr + 4)[0];
|
||||
float dst02 = (local_ptr + 8)[0];
|
||||
|
||||
float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
const float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
const float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
const float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
|
||||
float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
const float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
const float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
const float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
|
||||
float dst30 = (local_ptr + 24)[0];
|
||||
float dst31 = (local_ptr + 28)[0];
|
||||
float dst32 = (local_ptr + 32)[0];
|
||||
|
||||
float m00 = dst00;
|
||||
float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
|
||||
float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
|
||||
const float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
|
||||
const float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
|
||||
float m03 = dst02;
|
||||
|
||||
float m10 = dst10;
|
||||
float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
|
||||
float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
|
||||
const float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
|
||||
const float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
|
||||
float m13 = dst12;
|
||||
|
||||
float m20 = dst20;
|
||||
float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
|
||||
float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
|
||||
const float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
|
||||
const float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
|
||||
float m23 = dst22;
|
||||
|
||||
float m30 = dst30;
|
||||
float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
|
||||
float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
|
||||
const float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
|
||||
const float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
|
||||
float m33 = dst32;
|
||||
|
||||
*(dst + j) = m00;
|
||||
|
@ -387,7 +387,7 @@ void ConvDw3x3Fp32FilterTrans(float *trans_weight, float *weight, int oc4) {
|
|||
void ConvDw3x3Fp32InputTrans(const float *input_data, float *trans_input, float *block_buffer, int out_h_block,
|
||||
int out_w_block, const ConvParameter *conv_param) {
|
||||
int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
|
||||
int input_unit = 4;
|
||||
const int input_unit = 4;
|
||||
memset(trans_input, 0, out_h_block * out_h_block * 16 * C4NUM * sizeof(float));
|
||||
|
||||
for (int oh = 0; oh < out_h_block; oh++) {
|
||||
|
@ -426,7 +426,7 @@ void ConvDw3x3Fp32InputTrans(const float *input_data, float *trans_input, float
|
|||
|
||||
// todo yangruoqi: implement assembly
|
||||
void ConvDw3x3Fp32Winograd(float *trans_buffer, const float *weight, int out_h_block, int out_w_block) {
|
||||
int unit = 4;
|
||||
const int unit = 4;
|
||||
for (int oh = 0; oh < out_h_block; oh++) {
|
||||
float *buf_oh = trans_buffer + oh * out_w_block * 16 * C4NUM;
|
||||
for (int ow = 0; ow < out_w_block; ow++) {
|
||||
|
@ -583,7 +583,7 @@ void ConvDw3x3Fp32OutputTrans(float *trans_buffer, float *output_data, const flo
|
|||
int oc4 = UP_DIV(conv_param->output_channel_, C4NUM);
|
||||
bool h_in_range = true;
|
||||
for (int oh = 0; oh < out_h_block; oh++) {
|
||||
int real_oh = 2 * oh;
|
||||
const int real_oh = 2 * oh;
|
||||
if ((oh + 1) * 2 > conv_param->output_h_) {
|
||||
h_in_range = false;
|
||||
}
|
||||
|
@ -592,7 +592,7 @@ void ConvDw3x3Fp32OutputTrans(float *trans_buffer, float *output_data, const flo
|
|||
float *output_oh = output_data + real_oh * conv_param->output_w_ * oc4 * C4NUM;
|
||||
|
||||
for (int ow = 0; ow < out_w_block; ow++) {
|
||||
int real_ow = 2 * ow;
|
||||
const int real_ow = 2 * ow;
|
||||
if ((ow + 1) * 2 > conv_param->output_w_) {
|
||||
w_in_range = false;
|
||||
}
|
||||
|
|
|
@ -47,13 +47,13 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input
|
|||
int y_bottom = (int)(floor(actual_y));
|
||||
int y_top = y_bottom + 1 < in_h ? (y_bottom + 1) : (in_h - 1);
|
||||
float y_top_weight = actual_y - (float)(y_bottom);
|
||||
float y_bottom_weight = 1.0f - y_top_weight;
|
||||
const float y_bottom_weight = 1.0f - y_top_weight;
|
||||
for (w = 0; w < new_width; w++) {
|
||||
float actual_x = (float)(w)*width_scale;
|
||||
int x_left = (int)(floor(actual_x));
|
||||
int x_right = x_left + 1 < in_w ? (x_left + 1) : (in_w - 1);
|
||||
float x_right_weight = actual_x - (float)(x_left);
|
||||
float x_left_weight = 1.0f - x_right_weight;
|
||||
const float x_left_weight = 1.0f - x_right_weight;
|
||||
c = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
for (; c <= in_c - 4; c += 4) {
|
||||
|
|
|
@ -30,7 +30,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, const int *in_shape, c
|
|||
int pooled_width = param->pooledW_;
|
||||
int in_stride[DIMENSION_4D];
|
||||
int out_stride[DIMENSION_4D];
|
||||
int roi_stride = 5;
|
||||
const int roi_stride = 5;
|
||||
in_stride[DIMENSION_4D - 1] = 1;
|
||||
out_stride[DIMENSION_4D - 1] = 1;
|
||||
for (int i = dim - 2; i >= 0; --i) {
|
||||
|
|
|
@ -138,7 +138,7 @@ void DoPadding(const float *input, float *padded_input, SpaceToBatchParameter pa
|
|||
}
|
||||
|
||||
int SpaceToBatch(const float *input, float *output, SpaceToBatchParameter param, float *tmp_space[3]) {
|
||||
float *padded_input;
|
||||
float *padded_input = NULL;
|
||||
int ret;
|
||||
if (param.need_paddings_) {
|
||||
if (tmp_space[0] == NULL || tmp_space[1] == NULL || tmp_space[2] == NULL) {
|
||||
|
|
|
@ -30,7 +30,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
|
|||
int output_h = pooling_param->output_h_;
|
||||
int output_batch = pooling_param->output_batch_;
|
||||
|
||||
const float *inPtr;
|
||||
const float *inPtr = NULL;
|
||||
for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;
|
||||
|
||||
// int pad_top = padding[2];
|
||||
|
@ -119,7 +119,7 @@ void MaxPoolingGrad(const float *dy, const int *indices, float *output_ptr, Pool
|
|||
|
||||
const float *yt = (const float *)(dy);
|
||||
const int *pos = (const int *)(indices);
|
||||
float *out;
|
||||
float *out = NULL;
|
||||
|
||||
if (1) { // grads->layout() == Tensor::nhwc)
|
||||
for (int ib = 0; ib < output_batch; ib++) {
|
||||
|
|
|
@ -34,7 +34,7 @@ void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_co
|
|||
void DoArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count,
|
||||
int axis_count, int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
for (int i = 0; i < pre_axis_count; ++i) {
|
||||
|
|
|
@ -28,7 +28,7 @@ void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const in
|
|||
size_t output_offset = 0;
|
||||
size_t in_stride_h = in_w * in_c;
|
||||
size_t in_stride_n = in_stride_h * in_h;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float scale = in_quant_arg->scale_ * output_inverse_scale;
|
||||
float bias = -in_quant_arg->zp_ * scale;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
|
@ -76,7 +76,7 @@ void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_
|
|||
size_t in_stride_h = in_w * in_c;
|
||||
size_t in_stride_n = in_stride_h * in_h;
|
||||
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float scale = in_quant_arg->scale_ * output_inverse_scale;
|
||||
float bias = -in_quant_arg->zp_ * scale;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
void Int8Concat(int8_t **inputs, int8_t *output, ConcatParameter *para, int axis, int64_t real_dst_count, int task_id) {
|
||||
float output_scale = para->quant_arg_.out_args_.scale_;
|
||||
float output_inverse_scale = 1.f / output_scale;
|
||||
const float output_inverse_scale = 1.f / output_scale;
|
||||
int input_num = para->input_num_;
|
||||
int count_unit_ = para->count_unit_;
|
||||
int after_axis_size = para->after_axis_size;
|
||||
|
|
|
@ -201,7 +201,7 @@ void Conv3x3Uint8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, i
|
|||
#ifdef ENABLE_ARM
|
||||
IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t));
|
||||
#else
|
||||
int input_unit_square = 16;
|
||||
const int input_unit_square = 16;
|
||||
for (int c = 0; c < oc4; c++) {
|
||||
int filter_oc_offset = c * input_unit_square * ic8 * C8NUM * C4NUM;
|
||||
int dst_oc_offset = c * input_unit_square * C4NUM;
|
||||
|
|
|
@ -22,7 +22,7 @@ void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, int *in_shape,
|
|||
int32_t in_shape_dim2 = in_shape[2];
|
||||
int32_t in_shape_dim1 = in_shape[1];
|
||||
size_t copy_size = block_size * param->out_stride_dim2_;
|
||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
float scale = in_quant_arg->scale_ * output_inverse_scale;
|
||||
float bias = -in_quant_arg->zp_ * scale;
|
||||
int32_t output_zp = out_quant_arg->zp_;
|
||||
|
|
|
@ -36,8 +36,8 @@ void AvgPoolingInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParamete
|
|||
float output_scale = pooling_param->quant_args_[1][0].scale_;
|
||||
int output_zp = pooling_param->quant_args_[1][0].zp_;
|
||||
double real_multiplier = input_scale / output_scale;
|
||||
int8_t out_min = INT8_MIN;
|
||||
int8_t out_max = INT8_MAX;
|
||||
const int8_t out_min = INT8_MIN;
|
||||
const int8_t out_max = INT8_MAX;
|
||||
|
||||
for (int batch = 0; batch < output_batch; batch++) {
|
||||
int in_batch_offset = batch * in_h * in_w * channel;
|
||||
|
@ -91,8 +91,8 @@ void AvgPoolingOptInt8(const int8_t *input_ptr, int8_t *output_ptr, PoolingParam
|
|||
int out_tile_count = UP_DIV(out_plane, TILE_NUM);
|
||||
int thread_num = pooling_param->thread_num_;
|
||||
int c8 = UP_DIV(channel, C8NUM);
|
||||
int8_t out_min = INT8_MIN;
|
||||
int8_t out_max = INT8_MAX;
|
||||
const int8_t out_min = INT8_MIN;
|
||||
const int8_t out_max = INT8_MAX;
|
||||
|
||||
for (int batch = 0; batch < output_batch; batch++) {
|
||||
int in_batch_offset = batch * in_h * in_w * channel;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
void prelu(int8_t *inputs, int8_t *output_ptr, PreluParameter *quant_prelu_parm, int task_id) {
|
||||
float output_scale = quant_prelu_parm->quant_arg.out_args_.scale_;
|
||||
int output_zp = quant_prelu_parm->quant_arg.out_args_.zp_;
|
||||
float output_inverse_scale = 1.f / output_scale;
|
||||
const float output_inverse_scale = 1.f / output_scale;
|
||||
int output_dim = quant_prelu_parm->input_dim_;
|
||||
|
||||
QuantArg *input_quant = NULL;
|
||||
|
|
|
@ -22,7 +22,7 @@ void Int8Reshape(int8_t *input_ptr, int8_t *output_ptr, int64_t real_dst_count,
|
|||
if (para.in_args_.scale_ == para.out_args_.scale_ && para.in_args_.zp_ == para.out_args_.zp_) {
|
||||
memcpy(output_ptr, input_ptr, real_dst_count);
|
||||
} else {
|
||||
float output_inverse_scale = 1.f / para.out_args_.scale_;
|
||||
const float output_inverse_scale = 1.f / para.out_args_.scale_;
|
||||
float scale = para.in_args_.scale_ * output_inverse_scale;
|
||||
float bias = -para.in_args_.zp_ * scale;
|
||||
int32_t output_zp = para.out_args_.zp_;
|
||||
|
|
|
@ -115,6 +115,9 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
|
|||
}
|
||||
|
||||
void ComputeScale(const int32_t in_value, const int32_t out_value, const bool align_corners, int32_t *scale) {
|
||||
if (out_value == 0) {
|
||||
return;
|
||||
}
|
||||
*scale = (in_value * (1 << 10) + out_value / 2) / out_value;
|
||||
if (align_corners && out_value > 1) {
|
||||
*scale = ((in_value - 1) * (1 << 10) + (out_value - 1) / 2) / (out_value - 1);
|
||||
|
@ -133,6 +136,9 @@ void ComputeInterpolationArgs(const int32_t pos, const int32_t scale, const int3
|
|||
|
||||
void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
|
||||
int32_t *nearest) {
|
||||
if (new_size == 0) {
|
||||
return;
|
||||
}
|
||||
*nearest = (in_size * pos) / new_size;
|
||||
if (align_corners) {
|
||||
*nearest = ((in_size - 1) * pos + (new_size - 1) / 2) / (new_size - 1);
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
void Squeeze(int8_t **inputs, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
|
||||
SqueezeParameter *para_, size_t osize) {
|
||||
float output_scale = quant_Squeeze_parm->out_quant_args_.scale_;
|
||||
float output_inverse_scale = 1.f / output_scale;
|
||||
const float output_inverse_scale = 1.f / output_scale;
|
||||
QuantArg *input_quant = quant_Squeeze_parm->in_quant_args_;
|
||||
int output_zp = quant_Squeeze_parm->out_quant_args_.zp_;
|
||||
|
||||
int i = 0;
|
||||
const int i = 0;
|
||||
int8_t *input_ptr = inputs[0];
|
||||
for (int j = task_id; j < osize; j += para_->op_parameter_.thread_num_) {
|
||||
float scale = input_quant[i].scale_ * output_inverse_scale;
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
void PackWeightFp32(float *weight_data, ConvParameter *conv_param, float *packed_weight, int oc_block,
|
||||
int oc_block_num) {
|
||||
// original weight format : ohwi
|
||||
if (oc_block_num == 0) {
|
||||
return;
|
||||
}
|
||||
int kernel_h = conv_param->kernel_h_;
|
||||
int kernel_w = conv_param->kernel_w_;
|
||||
int in_channel = conv_param->input_channel_;
|
||||
|
@ -30,7 +33,7 @@ void PackWeightFp32(float *weight_data, ConvParameter *conv_param, float *packed
|
|||
int pack_weight_size = oc_block * oc_block_num * ic4 * C4NUM * kernel_plane;
|
||||
|
||||
int unit_size = oc_block * C4NUM;
|
||||
int block_size = pack_weight_size / oc_block_num;
|
||||
const int block_size = pack_weight_size / oc_block_num;
|
||||
|
||||
for (int m = 0; m < kernel_plane; m++) {
|
||||
int kernel_plane_stride = m * in_channel;
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
#include "nnacl/prior_box.h"
|
||||
|
||||
int PriorBox(const float *input_data, float *output_data, const size_t size, const int tid, const int thread_num) {
|
||||
if (thread_num == 0) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
size_t unit_size = size / thread_num;
|
||||
if (tid == thread_num - 1) {
|
||||
size_t tail_size = size - unit_size * tid;
|
||||
|
|
|
@ -26,7 +26,7 @@ int Find(float *array, int len, float target) {
|
|||
}
|
||||
|
||||
void Unique(float *input, int input_len, float *output0, int *output0_len, int *output1) {
|
||||
output0_len = 0;
|
||||
*output0_len = 0;
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
int idx = Find(output0, *output0_len, input[i]);
|
||||
if (idx != -1) {
|
||||
|
|
|
@ -28,7 +28,9 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
|
|||
int pad_w = conv_param->pad_w_;
|
||||
int input_h = conv_param->input_h_;
|
||||
int input_w = conv_param->input_w_;
|
||||
|
||||
if (out_w_block_num == 0) {
|
||||
return;
|
||||
}
|
||||
for (int c = 0; c < cal_num; c++) { // actual tiled number
|
||||
int src_x_s = (out_tile_index % out_w_block_num) * output_unit - pad_w;
|
||||
int src_y_s = (out_tile_index / out_w_block_num) * output_unit - pad_h;
|
||||
|
@ -83,7 +85,9 @@ void WinogradOutputTransform(const float *gemm_out, float *tmp_out_data, const f
|
|||
int output_channel = conv_param->output_channel_;
|
||||
int oc4 = UP_DIV(output_channel, C4NUM);
|
||||
int input_unit = conv_param->input_unit_;
|
||||
|
||||
if (output_unit_num == 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < cal_num; i++) {
|
||||
int dst_x_s = out_tile_index % output_unit_num;
|
||||
int dst_y_s = out_tile_index / output_unit_num;
|
||||
|
@ -281,7 +285,9 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
|
|||
int pad_h = conv_param->pad_h_;
|
||||
int ic4 = UP_DIV(input_channel, C4NUM);
|
||||
int input_unit = 4;
|
||||
|
||||
if (out_w_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
|
||||
int x_id = start_index + cal_id;
|
||||
int origin_x = (x_id % out_w_block) * OUPUT_UNIT - pad_w;
|
||||
|
@ -328,8 +334,11 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
|
|||
|
||||
void Conv3x3Fp32FilterTransform(float *weight_data, float *trans_weight, int iC4, int output_channel, int kernel_plane,
|
||||
int oc_block) {
|
||||
int input_unit = 4;
|
||||
const int input_unit = 4;
|
||||
int dst_step = iC4 * C4NUM * oc_block;
|
||||
if (oc_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int o = 0; o < output_channel; o++) {
|
||||
int oc_block_num = o / oc_block;
|
||||
int oc_block_rem = o % oc_block;
|
||||
|
@ -485,36 +494,36 @@ void Conv3x3Fp32FilterTransform(float *weight_data, float *trans_weight, int iC4
|
|||
float dst01 = (local_ptr + 4)[0];
|
||||
float dst02 = (local_ptr + 8)[0];
|
||||
|
||||
float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
const float dst10 = 0.5f * local_ptr[0] + 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
const float dst11 = 0.5f * (local_ptr + 4)[0] + 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
const float dst12 = 0.5f * (local_ptr + 8)[0] + 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
|
||||
float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
const float dst20 = 0.5f * local_ptr[0] - 0.5f * (local_ptr + 12)[0] + 0.5f * (local_ptr + 24)[0];
|
||||
const float dst21 = 0.5f * (local_ptr + 4)[0] - 0.5f * (local_ptr + 16)[0] + 0.5f * (local_ptr + 28)[0];
|
||||
const float dst22 = 0.5f * (local_ptr + 8)[0] - 0.5f * (local_ptr + 20)[0] + 0.5f * (local_ptr + 32)[0];
|
||||
|
||||
float dst30 = (local_ptr + 24)[0];
|
||||
float dst31 = (local_ptr + 28)[0];
|
||||
float dst32 = (local_ptr + 32)[0];
|
||||
|
||||
float m00 = dst00;
|
||||
float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
|
||||
float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
|
||||
const float m01 = 0.5f * dst00 + 0.5f * dst01 + 0.5f * dst02;
|
||||
const float m02 = 0.5f * dst00 - 0.5f * dst01 + 0.5f * dst02;
|
||||
float m03 = dst02;
|
||||
|
||||
float m10 = dst10;
|
||||
float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
|
||||
float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
|
||||
const float m11 = 0.5f * dst10 + 0.5f * dst11 + 0.5f * dst12;
|
||||
const float m12 = 0.5f * dst10 - 0.5f * dst11 + 0.5f * dst12;
|
||||
float m13 = dst12;
|
||||
|
||||
float m20 = dst20;
|
||||
float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
|
||||
float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
|
||||
const float m21 = 0.5f * dst20 + 0.5f * dst21 + 0.5f * dst22;
|
||||
const float m22 = 0.5f * dst20 - 0.5f * dst21 + 0.5f * dst22;
|
||||
float m23 = dst22;
|
||||
|
||||
float m30 = dst30;
|
||||
float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
|
||||
float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
|
||||
const float m31 = 0.5f * dst30 + 0.5f * dst31 + 0.5f * dst32;
|
||||
const float m32 = 0.5f * dst30 - 0.5f * dst31 + 0.5f * dst32;
|
||||
float m33 = dst32;
|
||||
|
||||
*(dst_ic4_ptr + j * 8) = m00;
|
||||
|
@ -652,8 +661,10 @@ void Conv3x3Fp32OutputTransform(const float *gemm_out, float *out_data, const fl
|
|||
int output_w = conv_param->output_w_;
|
||||
int output_h = conv_param->output_h_;
|
||||
int oc4 = UP_DIV(output_channel, C4NUM);
|
||||
int input_unit = 4;
|
||||
|
||||
const int input_unit = 4;
|
||||
if (out_w_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < real_cal_num; i++) {
|
||||
int out_w_index = (start_index + i) % out_w_block;
|
||||
int out_h_index = (start_index + i) / out_w_block;
|
||||
|
@ -855,9 +866,11 @@ void Conv3x3Uint8InputTransform(const int16_t *input_data, int16_t *trans_input,
|
|||
int pad_h = conv_param->pad_h_;
|
||||
ConvQuantArg quant_arg = conv_param->conv_quant_arg_;
|
||||
int input_zp = quant_arg.input_quant_args_[0].zp_;
|
||||
int ic8 = UP_DIV(input_channel, C8NUM);
|
||||
int input_unit = 4;
|
||||
|
||||
const int ic8 = UP_DIV(input_channel, C8NUM);
|
||||
const int input_unit = 4;
|
||||
if (out_w_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int cal_id = 0; cal_id < real_cal_num; cal_id++) {
|
||||
int x_id = start_index + cal_id;
|
||||
int origin_x = (x_id % out_w_block) * OUPUT_UNIT - pad_w;
|
||||
|
@ -890,7 +903,7 @@ void Conv3x3Uint8InputTransform(const int16_t *input_data, int16_t *trans_input,
|
|||
|
||||
void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weight, int iC8, int output_channel,
|
||||
int kernel_plane) {
|
||||
int input_unit = 4;
|
||||
const int input_unit = 4;
|
||||
int dst_step = iC8 * C8NUM * C4NUM;
|
||||
for (int o = 0; o < output_channel; o++) {
|
||||
int oc4_block_num = o / C4NUM;
|
||||
|
@ -1441,9 +1454,11 @@ void Conv3x3Uint8OutputTransform(const int32_t *gemm_out, int8_t *out_data, cons
|
|||
int output_channel = conv_param->output_channel_;
|
||||
int output_w = conv_param->output_w_;
|
||||
int output_h = conv_param->output_h_;
|
||||
int oc4 = UP_DIV(output_channel, C4NUM);
|
||||
int input_unit = 4;
|
||||
|
||||
const int oc4 = UP_DIV(output_channel, C4NUM);
|
||||
const int input_unit = 4;
|
||||
if (out_w_block == 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < real_cal_num; i++) {
|
||||
int out_w_index = (start_index + i) % out_w_block;
|
||||
int out_h_index = (start_index + i) / out_w_block;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -49,6 +49,9 @@ __kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t in
|
|||
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
||||
if (b == 0) {
|
||||
return;
|
||||
}
|
||||
write_imagef(output, (int2)(X, Y), a / b);
|
||||
}
|
||||
|
||||
|
|
|
@ -249,6 +249,10 @@ int ConvolutionOpenCLKernel::GetGlobalLocal(std::vector<size_t> *global, std::ve
|
|||
size_t global_c = UP_DIV(UP_DIV(param->output_channel_, C4NUM), work_group_size[2]) * work_group_size[2];
|
||||
|
||||
size_t local_c = GetBiggestDivider(global_c, max_z_size);
|
||||
if (local_c == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero";
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t local_hw_size = std::min<size_t>(256, max_work_group_size) / local_c;
|
||||
size_t local_w = std::min(global_w, local_hw_size);
|
||||
size_t local_h = std::min(local_hw_size / local_w, global_h);
|
||||
|
|
|
@ -32,6 +32,10 @@ std::vector<size_t> GetCommonGlobalSize(const std::vector<size_t> &local, const
|
|||
|
||||
std::vector<size_t> GetCommonLocalSize(const std::vector<size_t> &global, int max_size) {
|
||||
size_t wg_z = GetBiggestDividerWithPriority(global[2], 8);
|
||||
if (wg_z == 0) {
|
||||
MS_LOG(ERROR) << "Divide by zero";
|
||||
return {};
|
||||
}
|
||||
size_t wg_xy_size = max_size / wg_z;
|
||||
size_t wg_x = std::min(DivideRoundUp(global[0], 2), wg_xy_size);
|
||||
size_t wg_y = std::min(wg_xy_size / wg_x, global[1]);
|
||||
|
|
|
@ -130,6 +130,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size)
|
|||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
|
||||
UnLock();
|
||||
delete buffer;
|
||||
return nullptr;
|
||||
}
|
||||
device_ptr = static_cast<void *>(buffer);
|
||||
|
@ -138,6 +139,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t>& img_size)
|
|||
if (host_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
|
||||
UnLock();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
cl::Memory *mem = buffer;
|
||||
|
@ -187,6 +189,7 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v
|
|||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")";
|
||||
UnLock();
|
||||
delete buffer;
|
||||
return nullptr;
|
||||
}
|
||||
device_ptr = static_cast<void *>(buffer);
|
||||
|
|
|
@ -195,7 +195,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<tensor::Tensor *>
|
|||
}
|
||||
|
||||
desc.arch = kernel::KERNEL_ARCH::kCPU;
|
||||
kernel::LiteKernel *kernel;
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
if (data_type == kNumberTypeFloat32) {
|
||||
// check if support fp16
|
||||
kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type};
|
||||
|
|
|
@ -25,6 +25,10 @@ std::shared_ptr<ModelImpl> Import(const char *model_buf, size_t size) {
|
|||
return nullptr;
|
||||
}
|
||||
// todo hangangqiang remove when copy primitive done
|
||||
if (size <= 0) {
|
||||
MS_LOG(ERROR) << "size is zero";
|
||||
return nullptr;
|
||||
}
|
||||
auto *inner_buf = new char[size];
|
||||
memcpy(inner_buf, model_buf, size);
|
||||
auto meta_graph = schema::GetMetaGraph(inner_buf);
|
||||
|
|
|
@ -462,7 +462,7 @@ void BenchmarkFlags::InitInputDataList() {
|
|||
char *cur_input;
|
||||
const char *split_c = ",";
|
||||
cur_input = strtok(input_list, split_c);
|
||||
while (cur_input) {
|
||||
while (cur_input != nullptr) {
|
||||
input_data_list.emplace_back(cur_input);
|
||||
cur_input = strtok(nullptr, split_c);
|
||||
}
|
||||
|
|
|
@ -117,9 +117,9 @@ STATUS OnnxEltwiseParser::Parse(const onnx::GraphProto &onnx_graph,
|
|||
std::unique_ptr<schema::EltwiseT> attr(new schema::EltwiseT());
|
||||
if (onnx_node.op_type() == "Prod") {
|
||||
attr->mode = schema::EltwiseMode_PROD;
|
||||
} else if (onnx_node.op_type() == "Prod") {
|
||||
attr->mode = schema::EltwiseMode_SUM;
|
||||
} else if (onnx_node.op_type() == "Sum") {
|
||||
attr->mode = schema::EltwiseMode_SUM;
|
||||
} else if (onnx_node.op_type() == "Maximum") {
|
||||
attr->mode = schema::EltwiseMode_MAXIMUM;
|
||||
}
|
||||
|
||||
|
|
|
@ -166,6 +166,7 @@ const void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kerne
|
|||
auto data_size = kernel_num * kernel_size * sizeof(float);
|
||||
if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) {
|
||||
MS_LOG(EXCEPTION) << "memset newWeightData failed";
|
||||
delete[] tmp_weight_data;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue