forked from mindspore-Ecosystem/mindspore
fix coding specification according to cppchek report
This commit is contained in:
parent
f41ca6b5c6
commit
8c760a44bc
|
@ -47,7 +47,7 @@ int Power::InferShape(std::vector<lite::tensor::Tensor *> inputs, std::vector<li
|
||||||
}
|
}
|
||||||
auto output_tensor = outputs[0];
|
auto output_tensor = outputs[0];
|
||||||
MS_ASSERT(output_tensor != nullptr);
|
MS_ASSERT(output_tensor != nullptr);
|
||||||
if (exp_tensor) {
|
if (exp_tensor != nullptr) {
|
||||||
if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) {
|
if (exp_tensor->shape() != x_tensor->shape() || exp_tensor->data_type() != x_tensor->data_type()) {
|
||||||
MS_LOG(ERROR) << "Power inputs shape or type is not equal!";
|
MS_LOG(ERROR) << "Power inputs shape or type is not equal!";
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -39,7 +39,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
|
||||||
case kNumberTypeInt8:
|
case kNumberTypeInt8:
|
||||||
case kNumberTypeUInt8: {
|
case kNumberTypeUInt8: {
|
||||||
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||||
if (!kernel) {
|
if (kernel == nullptr) {
|
||||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,6 +65,13 @@ void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_wei
|
||||||
int kernel_plane_stride = channel_in;
|
int kernel_plane_stride = channel_in;
|
||||||
if (oc_block == 0) {
|
if (oc_block == 0) {
|
||||||
MS_LOG(ERROR) << "Divide by zero";
|
MS_LOG(ERROR) << "Divide by zero";
|
||||||
|
free(tmp_weight_data);
|
||||||
|
free(tmp_data);
|
||||||
|
free(trans_out_data);
|
||||||
|
free(matrix_g_data_fp16);
|
||||||
|
free(matrix_gt_data_fp16);
|
||||||
|
delete matrix_g;
|
||||||
|
delete matrix_gt;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < channel_out; i++) {
|
for (int i = 0; i < channel_out; i++) {
|
||||||
|
|
|
@ -54,6 +54,11 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int
|
||||||
int kernel_plane_stride = channel_in;
|
int kernel_plane_stride = channel_in;
|
||||||
if (oc_block == 0) {
|
if (oc_block == 0) {
|
||||||
MS_LOG(ERROR) << "Divide by zero";
|
MS_LOG(ERROR) << "Divide by zero";
|
||||||
|
free(tmp_weight_data);
|
||||||
|
free(tmp_data);
|
||||||
|
free(trans_out_data);
|
||||||
|
delete matrix_g;
|
||||||
|
delete matrix_gt;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < channel_out; i++) {
|
for (int i = 0; i < channel_out; i++) {
|
||||||
|
|
|
@ -161,6 +161,7 @@ int SqueezeInt8CPUKernel::Run() {
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: ";
|
MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: ";
|
||||||
}
|
}
|
||||||
|
free(inputs_array);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -219,7 +219,7 @@ void IndirectGemmFp32_Comm(float *output, const float *input, const float *weigh
|
||||||
int d4mod = deep % 4;
|
int d4mod = deep % 4;
|
||||||
int d4div = deep / 4;
|
int d4div = deep / 4;
|
||||||
int a_index = d4div * 4 * 8 + r * 4 + d4mod;
|
int a_index = d4div * 4 * 8 + r * 4 + d4mod;
|
||||||
int b_index = 8 * deep + c;
|
const int b_index = 8 * deep + c;
|
||||||
value += input[a_index] * weight[b_index];
|
value += input[a_index] * weight[b_index];
|
||||||
}
|
}
|
||||||
output[r * offset + c] = value;
|
output[r * offset + c] = value;
|
||||||
|
|
|
@ -334,7 +334,7 @@ void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_
|
||||||
bool relu6 = conv_param->is_relu6_;
|
bool relu6 = conv_param->is_relu6_;
|
||||||
// todo
|
// todo
|
||||||
int thread_count = conv_param->thread_num_;
|
int thread_count = conv_param->thread_num_;
|
||||||
int tile_n = 16;
|
const int tile_n = 16;
|
||||||
int output_count = out_h * out_w;
|
int output_count = out_h * out_w;
|
||||||
int output_tile_count = UP_DIV(output_count, tile_n);
|
int output_tile_count = UP_DIV(output_count, tile_n);
|
||||||
|
|
||||||
|
@ -379,7 +379,7 @@ void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16
|
||||||
float16_t *tile_buffer, float16_t *block_unit_buffer, float16_t *tmp_dst_buffer, float16_t *tmp_out,
|
float16_t *tile_buffer, float16_t *block_unit_buffer, float16_t *tmp_dst_buffer, float16_t *tmp_out,
|
||||||
int task_id, ConvParameter *conv_param) {
|
int task_id, ConvParameter *conv_param) {
|
||||||
int thread_count = conv_param->thread_num_;
|
int thread_count = conv_param->thread_num_;
|
||||||
int tile_num = 16;
|
const int tile_num = 16;
|
||||||
const int output_unit = 4;
|
const int output_unit = 4;
|
||||||
const int k_plane = 36;
|
const int k_plane = 36;
|
||||||
int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
|
int ic4 = UP_DIV(conv_param->input_channel_, C4NUM);
|
||||||
|
@ -427,7 +427,7 @@ void UnPack3x3OutputFp16(const float16_t *src, float16_t *dst, int batch, int he
|
||||||
float16_t *batch_out = dst + ro_batch_size;
|
float16_t *batch_out = dst + ro_batch_size;
|
||||||
for (int h = 0; h < height; h++) {
|
for (int h = 0; h < height; h++) {
|
||||||
int src_h_offset = h * out_w_block * C4NUM * C8NUM;
|
int src_h_offset = h * out_w_block * C4NUM * C8NUM;
|
||||||
int dst_h_offset = h * width * channel;
|
const int dst_h_offset = h * width * channel;
|
||||||
for (int w = 0; w < width; w++) {
|
for (int w = 0; w < width; w++) {
|
||||||
int src_w_offset = src_h_offset + w * C8NUM;
|
int src_w_offset = src_h_offset + w * C8NUM;
|
||||||
int dst_w_offset = dst_h_offset + w * channel;
|
int dst_w_offset = dst_h_offset + w * channel;
|
||||||
|
@ -462,7 +462,7 @@ void UnPack3x3ReluOutputFp16(const float16_t *src, float16_t *dst, int batch, in
|
||||||
float16_t *batch_out = dst + ro_batch_size;
|
float16_t *batch_out = dst + ro_batch_size;
|
||||||
for (int h = 0; h < height; h++) {
|
for (int h = 0; h < height; h++) {
|
||||||
int src_h_offset = h * out_w_block * C4NUM * C8NUM;
|
int src_h_offset = h * out_w_block * C4NUM * C8NUM;
|
||||||
int dst_h_offset = h * width * channel;
|
const int dst_h_offset = h * width * channel;
|
||||||
for (int w = 0; w < width; w++) {
|
for (int w = 0; w < width; w++) {
|
||||||
int src_w_offset = src_h_offset + w * C8NUM;
|
int src_w_offset = src_h_offset + w * C8NUM;
|
||||||
int dst_w_offset = dst_h_offset + w * channel;
|
int dst_w_offset = dst_h_offset + w * channel;
|
||||||
|
@ -502,7 +502,7 @@ void UnPack3x3Relu6OutputFp16(const float16_t *src, float16_t *dst, int batch, i
|
||||||
float16_t *batch_out = dst + ro_batch_size;
|
float16_t *batch_out = dst + ro_batch_size;
|
||||||
for (int h = 0; h < height; h++) {
|
for (int h = 0; h < height; h++) {
|
||||||
int src_h_offset = h * out_w_block * C4NUM * C8NUM;
|
int src_h_offset = h * out_w_block * C4NUM * C8NUM;
|
||||||
int dst_h_offset = h * width * channel;
|
const int dst_h_offset = h * width * channel;
|
||||||
for (int w = 0; w < width; w++) {
|
for (int w = 0; w < width; w++) {
|
||||||
int src_w_offset = src_h_offset + w * C8NUM;
|
int src_w_offset = src_h_offset + w * C8NUM;
|
||||||
int dst_w_offset = dst_h_offset + w * channel;
|
int dst_w_offset = dst_h_offset + w * channel;
|
||||||
|
@ -545,7 +545,7 @@ void ConvWinogardFp16(float16_t *input_data, float16_t *trans_weight, const floa
|
||||||
int out_unit = conv_param->output_unit_;
|
int out_unit = conv_param->output_unit_;
|
||||||
int out_w_block = UP_DIV(conv_param->output_w_, out_unit);
|
int out_w_block = UP_DIV(conv_param->output_w_, out_unit);
|
||||||
int out_h_block = UP_DIV(conv_param->output_h_, out_unit);
|
int out_h_block = UP_DIV(conv_param->output_h_, out_unit);
|
||||||
int tile_num = 16;
|
const int tile_num = 16;
|
||||||
int output_count = out_w_block * out_h_block;
|
int output_count = out_w_block * out_h_block;
|
||||||
int output_tile_count = UP_DIV(output_count, tile_num);
|
int output_tile_count = UP_DIV(output_count, tile_num);
|
||||||
int out_channel = conv_param->output_channel_;
|
int out_channel = conv_param->output_channel_;
|
||||||
|
@ -594,7 +594,7 @@ void UnPackWinogradOutputFp16(const float16_t *src, float16_t *dst, int batch, i
|
||||||
int dst_batch_offset = b * height * width * channel;
|
int dst_batch_offset = b * height * width * channel;
|
||||||
for (int h = 0; h < height; h++) {
|
for (int h = 0; h < height; h++) {
|
||||||
int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit);
|
int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit);
|
||||||
int dst_h_offset = dst_batch_offset + h * width * channel;
|
const int dst_h_offset = dst_batch_offset + h * width * channel;
|
||||||
for (int w = 0; w < width; w++) {
|
for (int w = 0; w < width; w++) {
|
||||||
int src_w_offset = src_h_offset + w * C8NUM;
|
int src_w_offset = src_h_offset + w * C8NUM;
|
||||||
int dst_w_offset = dst_h_offset + w * channel;
|
int dst_w_offset = dst_h_offset + w * channel;
|
||||||
|
@ -633,7 +633,7 @@ void UnPackWinogradReluOutputFp16(const float16_t *src, float16_t *dst, int batc
|
||||||
int dst_batch_offset = b * height * width * channel;
|
int dst_batch_offset = b * height * width * channel;
|
||||||
for (int h = 0; h < height; h++) {
|
for (int h = 0; h < height; h++) {
|
||||||
int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit);
|
int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit);
|
||||||
int dst_h_offset = dst_batch_offset + h * width * channel;
|
const int dst_h_offset = dst_batch_offset + h * width * channel;
|
||||||
for (int w = 0; w < width; w++) {
|
for (int w = 0; w < width; w++) {
|
||||||
int src_w_offset = src_h_offset + w * C8NUM;
|
int src_w_offset = src_h_offset + w * C8NUM;
|
||||||
int dst_w_offset = dst_h_offset + w * channel;
|
int dst_w_offset = dst_h_offset + w * channel;
|
||||||
|
@ -679,7 +679,7 @@ void UnPackWinogradRelu6OutputFp16(const float16_t *src, float16_t *dst, int bat
|
||||||
int dst_batch_offset = b * height * width * channel;
|
int dst_batch_offset = b * height * width * channel;
|
||||||
for (int h = 0; h < height; h++) {
|
for (int h = 0; h < height; h++) {
|
||||||
int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit);
|
int src_h_offset = src_batch_offset + C8NUM * (h * out_w_block_num * output_unit);
|
||||||
int dst_h_offset = dst_batch_offset + h * width * channel;
|
const int dst_h_offset = dst_batch_offset + h * width * channel;
|
||||||
for (int w = 0; w < width; w++) {
|
for (int w = 0; w < width; w++) {
|
||||||
int src_w_offset = src_h_offset + w * C8NUM;
|
int src_w_offset = src_h_offset + w * C8NUM;
|
||||||
int dst_w_offset = dst_h_offset + w * channel;
|
int dst_w_offset = dst_h_offset + w * channel;
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
void PostConvFuncCommFp16(float16_t *out_ptr, const float16_t *src_ptr_, const float16_t *bias_ptr,
|
void PostConvFuncCommFp16(float16_t *out_ptr, const float16_t *src_ptr_, const float16_t *bias_ptr,
|
||||||
size_t output_channel, size_t plane_size, size_t stride, bool is_relu, bool is_relu6,
|
size_t output_channel, size_t plane_size, size_t stride, bool is_relu, bool is_relu6,
|
||||||
int size) {
|
int size) {
|
||||||
|
if (size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
for (int oc = 0; oc < output_channel; oc++) {
|
for (int oc = 0; oc < output_channel; oc++) {
|
||||||
int oc_div = oc / size, oc_mod = oc % size;
|
int oc_div = oc / size, oc_mod = oc % size;
|
||||||
for (int hw = 0; hw < plane_size; hw++) {
|
for (int hw = 0; hw < plane_size; hw++) {
|
||||||
|
|
|
@ -93,8 +93,8 @@ void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float1
|
||||||
|
|
||||||
void PackWeightFp16(float16_t *weight_data, ConvParameter *conv_param, float16_t *packed_weight) {
|
void PackWeightFp16(float16_t *weight_data, ConvParameter *conv_param, float16_t *packed_weight) {
|
||||||
// original weight format : ohwi
|
// original weight format : ohwi
|
||||||
int tile_num = 8;
|
const int tile_num = 8;
|
||||||
int inchannel_block = 4;
|
const int inchannel_block = 4;
|
||||||
int kernel_h = conv_param->kernel_h_;
|
int kernel_h = conv_param->kernel_h_;
|
||||||
int kernel_w = conv_param->kernel_w_;
|
int kernel_w = conv_param->kernel_w_;
|
||||||
int in_channel = conv_param->input_channel_;
|
int in_channel = conv_param->input_channel_;
|
||||||
|
|
|
@ -539,7 +539,7 @@ void Conv3x3Fp16OutputTransform(const float16_t *gemm_out, float16_t *out_data,
|
||||||
void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data, int cal_num,
|
void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_input, float16_t *tmp_data, int cal_num,
|
||||||
int out_tile_index, int out_w_block_num, ConvParameter *conv_param,
|
int out_tile_index, int out_w_block_num, ConvParameter *conv_param,
|
||||||
InputTransformUnitFp16Func input_trans_func) {
|
InputTransformUnitFp16Func input_trans_func) {
|
||||||
int tile_num = 16;
|
const int tile_num = 16;
|
||||||
int input_unit = conv_param->input_unit_;
|
int input_unit = conv_param->input_unit_;
|
||||||
int output_unit = conv_param->output_unit_;
|
int output_unit = conv_param->output_unit_;
|
||||||
int in_channel = conv_param->input_channel_;
|
int in_channel = conv_param->input_channel_;
|
||||||
|
|
|
@ -160,7 +160,7 @@ void InputTransform4x4UnitFp16(const float16_t *src_data, float16_t *dst_data, i
|
||||||
float16_t m23 = t23 - 0.25f * t21;
|
float16_t m23 = t23 - 0.25f * t21;
|
||||||
|
|
||||||
float16_t m30 = t30 - 4 * t32;
|
float16_t m30 = t30 - 4 * t32;
|
||||||
float16_t m31 = t31 + 2 * t32;
|
const float16_t m31 = t31 + 2 * t32;
|
||||||
float16_t m32 = 2 * t32 - t31;
|
float16_t m32 = 2 * t32 - t31;
|
||||||
float16_t m33 = t33 - 0.25f * t31;
|
float16_t m33 = t33 - 0.25f * t31;
|
||||||
|
|
||||||
|
|
|
@ -437,7 +437,7 @@ void Conv3x3Fp32(float *input_data, float *transed_weight, const float *bias_dat
|
||||||
int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT);
|
int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT);
|
||||||
int output_count = out_w_block * out_h_block;
|
int output_count = out_w_block * out_h_block;
|
||||||
int output_tile_count = UP_DIV(output_count, TILE_NUM);
|
int output_tile_count = UP_DIV(output_count, TILE_NUM);
|
||||||
int input_unit_square = 4 * 4;
|
const int input_unit_square = 4 * 4;
|
||||||
float *tile_buffer = buffer_list[0];
|
float *tile_buffer = buffer_list[0];
|
||||||
float *block_unit_buffer = buffer_list[1];
|
float *block_unit_buffer = buffer_list[1];
|
||||||
float *tmp_dst_buffer = buffer_list[2];
|
float *tmp_dst_buffer = buffer_list[2];
|
||||||
|
|
|
@ -91,7 +91,7 @@ int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_
|
||||||
void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||||
bool out_value = param->out_value_;
|
bool out_value = param->out_value_;
|
||||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||||
int32_t output_zp = out_quant_arg->zp_;
|
int32_t output_zp = out_quant_arg->zp_;
|
||||||
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
||||||
|
@ -117,7 +117,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
|
||||||
void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||||
bool out_value = param->out_value_;
|
bool out_value = param->out_value_;
|
||||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||||
int32_t output_zp = out_quant_arg->zp_;
|
int32_t output_zp = out_quant_arg->zp_;
|
||||||
int in_shape1 = in_shape[1];
|
int in_shape1 = in_shape[1];
|
||||||
|
@ -148,7 +148,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
|
||||||
void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||||
bool out_value = param->out_value_;
|
bool out_value = param->out_value_;
|
||||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||||
int32_t output_zp = out_quant_arg->zp_;
|
int32_t output_zp = out_quant_arg->zp_;
|
||||||
int in_shape1 = in_shape[1];
|
int in_shape1 = in_shape[1];
|
||||||
|
@ -183,7 +183,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
|
||||||
void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
|
||||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||||
bool out_value = param->out_value_;
|
bool out_value = param->out_value_;
|
||||||
float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||||
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
|
||||||
int32_t output_zp = out_quant_arg->zp_;
|
int32_t output_zp = out_quant_arg->zp_;
|
||||||
int in_shape1 = in_shape[1];
|
int in_shape1 = in_shape[1];
|
||||||
|
|
|
@ -26,7 +26,7 @@ int ElementNotEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int elem
|
||||||
ArithmeticQuantArg *quant_arg) {
|
ArithmeticQuantArg *quant_arg) {
|
||||||
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
||||||
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
||||||
float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
||||||
float out_zp = quant_arg->out_args_.zp_;
|
float out_zp = quant_arg->out_args_.zp_;
|
||||||
|
|
||||||
for (int index = 0; index < element_size; ++index) {
|
for (int index = 0; index < element_size; ++index) {
|
||||||
|
@ -45,7 +45,7 @@ int ElementNotEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int elem
|
||||||
int ElementEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) {
|
int ElementEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) {
|
||||||
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
||||||
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
||||||
float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
||||||
float out_zp = quant_arg->out_args_.zp_;
|
float out_zp = quant_arg->out_args_.zp_;
|
||||||
for (int index = 0; index < element_size; ++index) {
|
for (int index = 0; index < element_size; ++index) {
|
||||||
float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias;
|
float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias;
|
||||||
|
@ -63,7 +63,7 @@ int ElementEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int element
|
||||||
int ElementLessInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) {
|
int ElementLessInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size, ArithmeticQuantArg *quant_arg) {
|
||||||
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
||||||
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
||||||
float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
||||||
float out_zp = quant_arg->out_args_.zp_;
|
float out_zp = quant_arg->out_args_.zp_;
|
||||||
for (int index = 0; index < element_size; ++index) {
|
for (int index = 0; index < element_size; ++index) {
|
||||||
float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias;
|
float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias;
|
||||||
|
@ -78,7 +78,7 @@ int ElementLessEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int ele
|
||||||
ArithmeticQuantArg *quant_arg) {
|
ArithmeticQuantArg *quant_arg) {
|
||||||
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
||||||
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
||||||
float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
||||||
float out_zp = quant_arg->out_args_.zp_;
|
float out_zp = quant_arg->out_args_.zp_;
|
||||||
|
|
||||||
for (int index = 0; index < element_size; ++index) {
|
for (int index = 0; index < element_size; ++index) {
|
||||||
|
@ -94,7 +94,7 @@ int ElementGreaterInt8(int8_t *input0, int8_t *input1, int8_t *output, int eleme
|
||||||
ArithmeticQuantArg *quant_arg) {
|
ArithmeticQuantArg *quant_arg) {
|
||||||
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
||||||
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
||||||
float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
||||||
float out_zp = quant_arg->out_args_.zp_;
|
float out_zp = quant_arg->out_args_.zp_;
|
||||||
|
|
||||||
for (int index = 0; index < element_size; ++index) {
|
for (int index = 0; index < element_size; ++index) {
|
||||||
|
@ -110,7 +110,7 @@ int ElementGreaterEqualInt8(int8_t *input0, int8_t *input1, int8_t *output, int
|
||||||
ArithmeticQuantArg *quant_arg) {
|
ArithmeticQuantArg *quant_arg) {
|
||||||
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
float in0_bias = -quant_arg->in0_args_.zp_ * quant_arg->in0_args_.scale_;
|
||||||
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
float in1_bias = -quant_arg->in1_args_.zp_ * quant_arg->in1_args_.scale_;
|
||||||
float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
const float output_inverse_scale = 1.f / quant_arg->out_args_.scale_;
|
||||||
float out_zp = quant_arg->out_args_.zp_;
|
float out_zp = quant_arg->out_args_.zp_;
|
||||||
for (int index = 0; index < element_size; ++index) {
|
for (int index = 0; index < element_size; ++index) {
|
||||||
float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias;
|
float in0_real = input0[index] * quant_arg->in0_args_.scale_ + in0_bias;
|
||||||
|
|
|
@ -365,7 +365,7 @@ void Conv3x3Int8(int16_t *input_data, int16_t *transed_weight, const int32_t *bi
|
||||||
int output_tile_count = UP_DIV(output_count, TILE_NUM);
|
int output_tile_count = UP_DIV(output_count, TILE_NUM);
|
||||||
int oc4 = UP_DIV(output_channel, C4NUM);
|
int oc4 = UP_DIV(output_channel, C4NUM);
|
||||||
int tile_buffer_offset = TILE_NUM * 16 * ic8 * C8NUM;
|
int tile_buffer_offset = TILE_NUM * 16 * ic8 * C8NUM;
|
||||||
int block_unit_buffer_offset = 16 * C8NUM;
|
const int block_unit_buffer_offset = 16 * C8NUM;
|
||||||
int tmp_dst_buffer_offset = TILE_NUM * 16 * oc4 * C4NUM;
|
int tmp_dst_buffer_offset = TILE_NUM * 16 * oc4 * C4NUM;
|
||||||
|
|
||||||
int input_batch = conv_param->input_batch_;
|
int input_batch = conv_param->input_batch_;
|
||||||
|
|
|
@ -253,7 +253,7 @@ int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis
|
||||||
return NNACL_NULL_PTR;
|
return NNACL_NULL_PTR;
|
||||||
}
|
}
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
int base_offset = 20;
|
const int base_offset = 20;
|
||||||
for (j = tid; j < outer_size; j += thread_num) {
|
for (j = tid; j < outer_size; j += thread_num) {
|
||||||
const int32_t *outer_src = src_data + j * axis_size * inner_size;
|
const int32_t *outer_src = src_data + j * axis_size * inner_size;
|
||||||
int8_t *outer_dst = dst_data + j * inner_size;
|
int8_t *outer_dst = dst_data + j * inner_size;
|
||||||
|
|
|
@ -41,7 +41,7 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int
|
||||||
for (n = 0; n < in_n; n++) {
|
for (n = 0; n < in_n; n++) {
|
||||||
for (h = tid; h < new_height; h += thread_num) {
|
for (h = tid; h < new_height; h += thread_num) {
|
||||||
// float actual_y = (float)h * height_scale;
|
// float actual_y = (float)h * height_scale;
|
||||||
int base_offset = 20;
|
const int base_offset = 20;
|
||||||
int scaled_actual_y;
|
int scaled_actual_y;
|
||||||
int bottom, top;
|
int bottom, top;
|
||||||
int scaled_bottom_weight, scaled_top_weight;
|
int scaled_bottom_weight, scaled_top_weight;
|
||||||
|
@ -149,7 +149,7 @@ void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32
|
||||||
int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape,
|
int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape,
|
||||||
const int *output_shape, const bool align_corners, const QuantMulArg *multiplier,
|
const int *output_shape, const bool align_corners, const QuantMulArg *multiplier,
|
||||||
QuantArg *quant_in, QuantArg *quant_out, int tid, int thread_num) {
|
QuantArg *quant_in, QuantArg *quant_out, int tid, int thread_num) {
|
||||||
int base_offset = 20;
|
const int base_offset = 20;
|
||||||
int32_t batch, y, x, c;
|
int32_t batch, y, x, c;
|
||||||
int32_t in_h, in_w, new_height, new_width;
|
int32_t in_h, in_w, new_height, new_width;
|
||||||
in_h = input_shape[1];
|
in_h = input_shape[1];
|
||||||
|
|
|
@ -55,7 +55,7 @@ int MultiplyByQuantizedMultiplier(int32_t value, int32_t multiplier, int32_t lef
|
||||||
}
|
}
|
||||||
|
|
||||||
int FractionsBits(int kIntegerBits) {
|
int FractionsBits(int kIntegerBits) {
|
||||||
int totalBits = 8 * sizeof(int32_t) - 1;
|
const int totalBits = 8 * sizeof(int32_t) - 1;
|
||||||
return totalBits - kIntegerBits;
|
return totalBits - kIntegerBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,7 +82,7 @@ int32_t BitNot(int32_t a) { return ~(uint32_t)a; }
|
||||||
int SelectUsingMask(int mask, int bound, int val) { return BitXor(BitAnd(mask, bound), BitAnd(BitNot(mask), val)); }
|
int SelectUsingMask(int mask, int bound, int val) { return BitXor(BitAnd(mask, bound), BitAnd(BitNot(mask), val)); }
|
||||||
|
|
||||||
int32_t MaskNonZero(int32_t a) {
|
int32_t MaskNonZero(int32_t a) {
|
||||||
int32_t zreo = 0;
|
const int32_t zreo = 0;
|
||||||
return a ? BitNot(zreo) : zreo;
|
return a ? BitNot(zreo) : zreo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -284,7 +284,7 @@ void Conv3x3Fp32InputTransform(const float *input_data, float *trans_input, floa
|
||||||
int pad_w = conv_param->pad_w_;
|
int pad_w = conv_param->pad_w_;
|
||||||
int pad_h = conv_param->pad_h_;
|
int pad_h = conv_param->pad_h_;
|
||||||
int ic4 = UP_DIV(input_channel, C4NUM);
|
int ic4 = UP_DIV(input_channel, C4NUM);
|
||||||
int input_unit = 4;
|
const int input_unit = 4;
|
||||||
if (out_w_block == 0) {
|
if (out_w_block == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,7 +162,7 @@ void InputTransform4x4Unit(const float *src_data, float *dst_data, int src_step,
|
||||||
|
|
||||||
float m30 = t30 - 4 * t32;
|
float m30 = t30 - 4 * t32;
|
||||||
float m31 = t31 + 2 * t32;
|
float m31 = t31 + 2 * t32;
|
||||||
float m32 = 2 * t32 - t31;
|
const float m32 = 2 * t32 - t31;
|
||||||
float m33 = t33 - 0.25f * t31;
|
float m33 = t33 - 0.25f * t31;
|
||||||
|
|
||||||
(dst_data + i)[0] = m00;
|
(dst_data + i)[0] = m00;
|
||||||
|
|
|
@ -49,6 +49,9 @@ __kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t in
|
||||||
|
|
||||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
||||||
|
if (b == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
write_imagef(output, (int2)(X, Y), a / b);
|
write_imagef(output, (int2)(X, Y), a / b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -510,6 +510,7 @@ STATUS AwareQuantizer::QuantConvBias(const mindspore::schema::MetaGraphT *graph,
|
||||||
auto ret = memcpy_s(biasTensor->data.data(), bShapeSize * sizeof(int32_t), qDatas, bShapeSize * sizeof(int32_t));
|
auto ret = memcpy_s(biasTensor->data.data(), bShapeSize * sizeof(int32_t), qDatas, bShapeSize * sizeof(int32_t));
|
||||||
if (ret != EOK) {
|
if (ret != EOK) {
|
||||||
// MS_LOGE("memcpy_s failed: %d", ret);
|
// MS_LOGE("memcpy_s failed: %d", ret);
|
||||||
|
delete[] qDatas;
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
delete[] qDatas;
|
delete[] qDatas;
|
||||||
|
|
Loading…
Reference in New Issue