fix signed and unsigned mix, pclint and self check

This commit is contained in:
zhaozhenlong 2021-08-06 09:53:59 +08:00
parent b5da59f2ac
commit 931c875430
61 changed files with 255 additions and 240 deletions

View File

@ -5,7 +5,8 @@
//void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, //void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
// const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, // const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
// int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, int peroc); // const int *multiplier, const int *left_shift, const int *right_shift, int row,
// int col, int stride, int peroc);
// x0: a(left matrix ptr) // x0: a(left matrix ptr)
// x1: b(right matrix ptr) // x1: b(right matrix ptr)

View File

@ -4,8 +4,9 @@
.align 5 .align 5
//void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums, //void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums,
// const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift, // const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
// int32_t *right_shift, size_t stride, size_t filter_peroc, int32_t *filter_zp) // const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc,
// const int32_t *filter_zp)
// x0: a(left matrix ptr) // x0: a(left matrix ptr)
// x1: b(right matrix ptr) // x1: b(right matrix ptr)

View File

@ -39,8 +39,8 @@ void DoArgMinMaxQuant(const int8_t *input, int8_t *output, const ArgMinMaxParame
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_; int32_t output_zp = out_quant_arg->zp_;
for (int i = 0; i < pre_axis_count; ++i) { for (int i = 0; i < pre_axis_count; ++i) {
size_t output_offset = i * after_axis_count; int output_offset = i * after_axis_count;
size_t input_offset = output_offset * axis_count; int input_offset = output_offset * axis_count;
for (int j = 0; j < after_axis_count; ++j) { for (int j = 0; j < after_axis_count; ++j) {
float value = -FLT_MAX; float value = -FLT_MAX;
if (!param->get_max_) { if (!param->get_max_) {
@ -97,8 +97,8 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
int32_t output_zp = out_quant_arg->zp_; int32_t output_zp = out_quant_arg->zp_;
for (int32_t i = 0; i < param->in_strides_[0]; ++i) { for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
for (int j = 0; j < in_shape[0]; ++j) { for (int j = 0; j < in_shape[0]; ++j) {
size_t offset = param->in_strides_[0] * j + i; int offset = param->in_strides_[0] * j + i;
param->arg_elements_[j].index_ = j; param->arg_elements_[j].index_ = (uint32_t)j;
param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
} }
if (param->get_max_) { if (param->get_max_) {
@ -108,7 +108,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
} }
for (int j = 0; j < param->topk_; ++j) { for (int j = 0; j < param->topk_; ++j) {
size_t out_offset = j * param->out_strides_[0] + i; int out_offset = j * param->out_strides_[0] + i;
float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
} }
@ -123,12 +123,12 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
int32_t output_zp = out_quant_arg->zp_; int32_t output_zp = out_quant_arg->zp_;
int in_shape1 = in_shape[1]; int in_shape1 = in_shape[1];
for (int i = 0; i < in_shape[0]; ++i) { for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0]; int in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0]; int out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < param->in_strides_[1]; ++j) { for (int j = 0; j < param->in_strides_[1]; ++j) {
for (int k = 0; k < in_shape1; ++k) { for (int k = 0; k < in_shape1; ++k) {
size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; int offset = param->in_strides_[1] * k + in_dim0_offset + j;
param->arg_elements_[k].index_ = k; param->arg_elements_[k].index_ = (size_t)k;
param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
} }
if (param->get_max_) { if (param->get_max_) {
@ -138,7 +138,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
} }
for (int k = 0; k < param->topk_; ++k) { for (int k = 0; k < param->topk_; ++k) {
size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; int out_offset = out_dim0_offset + j + k * param->out_strides_[1];
float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
} }
@ -155,15 +155,15 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
int in_shape1 = in_shape[1]; int in_shape1 = in_shape[1];
int in_shape2 = in_shape[2]; int in_shape2 = in_shape[2];
for (int i = 0; i < in_shape[0]; ++i) { for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0]; int in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0]; int out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < in_shape1; ++j) { for (int j = 0; j < in_shape1; ++j) {
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
for (int k = 0; k < param->in_strides_[2]; ++k) { for (int k = 0; k < param->in_strides_[2]; ++k) {
for (int l = 0; l < in_shape2; ++l) { for (int l = 0; l < in_shape2; ++l) {
size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; int offset = param->in_strides_[2] * l + k + in_dim1_offset;
param->arg_elements_[l].index_ = l; param->arg_elements_[l].index_ = (uint32_t)l;
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
} }
if (param->get_max_) { if (param->get_max_) {
@ -172,7 +172,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
} }
for (int l = 0; l < param->topk_; ++l) { for (int l = 0; l < param->topk_; ++l) {
size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; int out_offset = out_dim1_offset + k + l * param->out_strides_[2];
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
} }
@ -191,17 +191,17 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
int in_shape2 = in_shape[2]; int in_shape2 = in_shape[2];
int in_shape3 = in_shape[3]; int in_shape3 = in_shape[3];
for (int i = 0; i < in_shape[0]; ++i) { for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0]; int in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0]; int out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < in_shape1; ++j) { for (int j = 0; j < in_shape1; ++j) {
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
for (int k = 0; k < in_shape2; ++k) { for (int k = 0; k < in_shape2; ++k) {
size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
for (int l = 0; l < in_shape3; ++l) { for (int l = 0; l < in_shape3; ++l) {
size_t offset = l + in_dim2_offset; int offset = l + in_dim2_offset;
param->arg_elements_[l].index_ = l; param->arg_elements_[l].index_ = (uint32_t)l;
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
} }
if (param->get_max_) { if (param->get_max_) {
@ -210,7 +210,7 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
} }
for (int l = 0; l < param->topk_; ++l) { for (int l = 0; l < param->topk_; ++l) {
size_t out_offset = out_dim2_offset + l; int out_offset = out_dim2_offset + l;
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
} }

View File

@ -218,7 +218,7 @@ int16x4_t ClacSumHalfWord(int32x4_t scaled_input, int32x4_t left_shift_out_vec,
void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para, void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para,
int *index) { int *index) {
int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_); int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_); int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)para.shift_left_);
for (; (*index) <= element_size - 8; (*index) += 8) { for (; (*index) <= element_size - 8; (*index) += 8) {
int16x8_t input_val = LoadAndAddOffset(input_data, *index, para.in_args_.zp_); int16x8_t input_val = LoadAndAddOffset(input_data, *index, para.in_args_.zp_);

View File

@ -812,11 +812,11 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
for (int j = real_y_start; j < real_y_end; j++) { for (int j = real_y_start; j < real_y_end; j++) {
const int16_t *src = input_data + src_c8_offset + C8NUM * (j * input_width + real_x_start); const int16_t *src = input_data + src_c8_offset + C8NUM * (j * input_width + real_x_start);
int16_t *dst = tmp_data + C8NUM * (C4NUM * j + real_x_start); int16_t *dst = tmp_data + C8NUM * (C4NUM * j + real_x_start);
memcpy(dst, src, (real_x_end - real_x_start) * C8NUM * sizeof(int16_t)); memcpy(dst, src, (size_t)(real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
} }
// input transform // input transform
int dst_ic8_offset = dst_plane_offset + ic * TILE_NUM * C8NUM; int dst_ic8_offset = dst_plane_offset + ic * TILE_NUM * C8NUM;
size_t dst_step = ic8 * C8NUM * TILE_NUM; size_t dst_step = (size_t)ic8 * C8NUM * TILE_NUM;
int16_t *trans_input_ptr = trans_input + dst_ic8_offset; int16_t *trans_input_ptr = trans_input + dst_ic8_offset;
Conv3x3Int8InputUnit(tmp_data, trans_input_ptr, dst_step, input_zp); Conv3x3Int8InputUnit(tmp_data, trans_input_ptr, dst_step, input_zp);
} }
@ -826,7 +826,7 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) { void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) {
int oc4 = UP_DIV(oc, C4NUM); int oc4 = UP_DIV(oc, C4NUM);
#ifdef ENABLE_ARM #ifdef ENABLE_ARM
IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t)); IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, (size_t)oc4 * 4 * 16 * sizeof(int32_t));
#else #else
const int input_unit_square = 16; const int input_unit_square = 16;
for (int c = 0; c < oc4; c++) { for (int c = 0; c < oc4; c++) {

View File

@ -20,9 +20,9 @@
int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
const ConvParameter *conv_param) { const ConvParameter *conv_param) {
/* row4x4-major(ih*iw x oc*kh*kw) -> row4-major(oh*ow x oc) */ /* row4x4-major(ih*iw x oc*kh*kw) -> row4-major(oh*ow x oc) */
size_t input_plane = conv_param->input_w_ * conv_param->input_h_; int input_plane = conv_param->input_w_ * conv_param->input_h_;
size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
size_t output_plane = conv_param->output_w_ * conv_param->output_h_; int output_plane = conv_param->output_w_ * conv_param->output_h_;
int oc4 = UP_DIV(output_channel, C4NUM); int oc4 = UP_DIV(output_channel, C4NUM);
int in_plane4 = UP_ROUND(input_plane, C4NUM); int in_plane4 = UP_ROUND(input_plane, C4NUM);
@ -38,7 +38,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
for (int c = 0; c < oc4; c++) { for (int c = 0; c < oc4; c++) {
int32_t *dst_ptr = tmp + c * output_plane * C4NUM; int32_t *dst_ptr = tmp + c * output_plane * C4NUM;
const int32_t *src_ptr = src + c * in_plane4 * kernel_plane * C4NUM; const int32_t *src_ptr = src + c * in_plane4 * kernel_plane * C4NUM;
memset(dst_ptr, 0, output_plane * C4NUM * sizeof(int32_t)); memset(dst_ptr, 0, (size_t)output_plane * C4NUM * sizeof(int32_t));
for (int ih = 0; ih < conv_param->input_h_; ih++) { for (int ih = 0; ih < conv_param->input_h_; ih++) {
for (int iw = 0; iw < conv_param->input_w_; iw++) { for (int iw = 0; iw < conv_param->input_w_; iw++) {
@ -81,7 +81,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
} /*ih*/ } /*ih*/
} /*oc*/ } /*oc*/
PostFuncInt8C4(tmp, bias, out, output_channel, output_plane, conv_param->output_channel_, PostFuncInt8C4(tmp, bias, out, output_channel, (size_t)output_plane, conv_param->output_channel_,
conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0], conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0],
conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.output_quant_args_[0].zp_, conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]); conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]);

View File

@ -39,7 +39,7 @@ int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg)
if (arg->relu6_multiplier_exponent < 0) { if (arg->relu6_multiplier_exponent < 0) {
relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent); relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent);
} }
relu6_value = (relu6_value + (1 << 15)) >> 1; relu6_value = (size_t)(relu6_value + (1 << 15)) >> 1;
const int16_t preshift_output_value = const int16_t preshift_output_value =
SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale); SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale);

View File

@ -104,7 +104,7 @@ void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
for (int ri = 0; ri < row_4div; ri += C4NUM) { for (int ri = 0; ri < row_4div; ri += C4NUM) {
for (int ci = 0; ci < col_16div; ci += C16NUM) { for (int ci = 0; ci < col_16div; ci += C16NUM) {
size_t col_offset = col; size_t col_offset = (size_t)col;
int8_t *src_c = src_r + ci; int8_t *src_c = src_r + ci;
int8_t *dst_c = dst_r + ci * C4NUM; int8_t *dst_c = dst_r + ci * C4NUM;
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
@ -207,7 +207,7 @@ void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
int c2div = c / C2NUM, c2mod = c % C2NUM; int c2div = c / C2NUM, c2mod = c % C2NUM;
size_t ci = r * stride + c; size_t ci = r * stride + c;
int32_t value = 0; int32_t value = 0;
for (int d = 0; d < deep_16; d++) { for (int d = 0; d < (int)deep_16; d++) {
int d16div = d / C16NUM, d16mod = d % C16NUM; int d16div = d / C16NUM, d16mod = d % C16NUM;
size_t ai = r4div * deep_16 * C4NUM + d16div * C4NUM * C16NUM + r4mod * C16NUM + d16mod; size_t ai = r4div * deep_16 * C4NUM + d16div * C4NUM * C16NUM + r4mod * C16NUM + d16mod;
size_t bi = c2div * deep_16 * C2NUM + d16div * C2NUM * C16NUM + c2mod * C16NUM + d16mod; size_t bi = c2div * deep_16 * C2NUM + d16div * C2NUM * C16NUM + c2mod * C16NUM + d16mod;
@ -269,9 +269,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
#endif #endif
void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
size_t per_channel) { int32_t maxi, size_t per_channel) {
/* row8x4-major * row4x8-major => (int8)row-major */ /* row8x4-major * row4x8-major => (int8)row-major */
for (int r = 0; r < row; r++) { for (int r = 0; r < row; r++) {
for (int c = 0; c < col; c++) { for (int c = 0; c < col; c++) {
@ -279,7 +279,7 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
int c8div = c / C8NUM, c8mod = c % C8NUM; int c8div = c / C8NUM, c8mod = c % C8NUM;
size_t ci = r * stride + c; size_t ci = r * stride + c;
int32_t value = 0; int32_t value = 0;
for (int d = 0; d < deep_4; d++) { for (int d = 0; d < (int)deep_4; d++) {
int d4div = d / C4NUM, d4mod = d % C4NUM; int d4div = d / C4NUM, d4mod = d % C4NUM;
size_t ai = r8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + r8mod * C4NUM + d4mod; size_t ai = r8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + r8mod * C4NUM + d4mod;
size_t bi = c8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + c8mod * C4NUM + d4mod; size_t bi = c8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + c8mod * C4NUM + d4mod;
@ -302,9 +302,9 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
} }
void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
size_t per_channel, int32_t *filter_zp) { int32_t maxi, size_t per_channel, const int32_t *filter_zp) {
/* row4x4-major * row4x16-major => (int8)row-major */ /* row4x4-major * row4x16-major => (int8)row-major */
for (int r = 0; r < row; r++) { for (int r = 0; r < row; r++) {
for (int c = 0; c < col; c++) { for (int c = 0; c < col; c++) {
@ -312,7 +312,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
int c16div = c / C16NUM, c16mod = c % C16NUM; int c16div = c / C16NUM, c16mod = c % C16NUM;
size_t ci = r * stride + c; size_t ci = r * stride + c;
int32_t value = 0; int32_t value = 0;
for (int d = 0; d < deep_4; d++) { for (int d = 0; d < (int)deep_4; d++) {
int d4div = d / C4NUM, d4mod = d % C4NUM; int d4div = d / C4NUM, d4mod = d % C4NUM;
size_t ai = r4div * deep_4 * C4NUM + d4div * C4NUM * C4NUM + r4mod * C4NUM + d4mod; size_t ai = r4div * deep_4 * C4NUM + d4div * C4NUM * C4NUM + r4mod * C4NUM + d4mod;
size_t bi = c16div * deep_4 * C16NUM + d4div * C16NUM * C4NUM + c16mod * C4NUM + d4mod; size_t bi = c16div * deep_4 * C16NUM + d4div * C16NUM * C4NUM + c16mod * C4NUM + d4mod;
@ -453,7 +453,7 @@ void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input,
#else #else
int32_t tmp_sum_value[4] = {0}; int32_t tmp_sum_value[4] = {0};
for (int ici = 0; ici < ic_4div; ici += C4NUM) { for (int ici = 0; ici < ic_4div; ici += C4NUM) {
for (int i = 0; i < C4NUM; i++) { for (size_t i = 0; i < C4NUM; i++) {
tmp_sum_value[i] += src_ic[0 + i * input_channel]; tmp_sum_value[i] += src_ic[0 + i * input_channel];
tmp_sum_value[i] += src_ic[1 + i * input_channel]; tmp_sum_value[i] += src_ic[1 + i * input_channel];
tmp_sum_value[i] += src_ic[2 + i * input_channel]; tmp_sum_value[i] += src_ic[2 + i * input_channel];

View File

@ -42,9 +42,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
/* optimize conv */ /* optimize conv */
void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col); void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
size_t per_channel); int32_t maxi, size_t per_channel);
/* 4x16 16x2 -> 4x2 */ /* 4x16 16x2 -> 4x2 */
/* arm32 conv1x1 */ /* arm32 conv1x1 */
@ -61,9 +61,9 @@ void RowMajor2Row4x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum,
size_t input_channel, size_t plane_size, int32_t filter_zp); size_t input_channel, size_t plane_size, int32_t filter_zp);
void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
size_t per_channel, int32_t *filter_zp); int32_t maxi, size_t per_channel, const int32_t *filter_zp);
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums, void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums,

View File

@ -27,10 +27,10 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i
return vqmovn_s32(raw_sum); return vqmovn_s32(raw_sum);
} }
void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
MulQuantArg *quant_arg, int *index) { const MulQuantArg *quant_arg, int *index) {
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
@ -104,8 +104,8 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data,
} }
#endif #endif
void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
bool input1_broad, MulQuantArg *quant_arg) { int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg) {
// input0 need broadcast // input0 need broadcast
int32_t zp1 = quant_arg->in_quant_args_[0].zp_; int32_t zp1 = quant_arg->in_quant_args_[0].zp_;
int32_t zp2 = quant_arg->in_quant_args_[1].zp_; int32_t zp2 = quant_arg->in_quant_args_[1].zp_;
@ -215,8 +215,8 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int
return; return;
} }
void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
MulQuantArg *quant_arg) { const MulQuantArg *quant_arg) {
int index = 0; int index = 0;
#ifdef ENABLE_NEON #ifdef ENABLE_NEON
MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index); MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index);

View File

@ -28,9 +28,10 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg); void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, const MulQuantArg *quant_arg);
bool input1_broad, MulQuantArg *quant_arg); void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -849,7 +849,8 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara
} }
} }
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) { void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data,
const ConvParameter *conv_param) {
// origin weight format : ohwi // origin weight format : ohwi
int input_channel = conv_param->input_channel_; int input_channel = conv_param->input_channel_;
int ic8 = input_channel / C8NUM * C8NUM; int ic8 = input_channel / C8NUM * C8NUM;

View File

@ -40,7 +40,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, const int32_t
const ConvParameter *conv_param); const ConvParameter *conv_param);
void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16); void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16);
void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param); void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param);
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param); void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, const ConvParameter *conv_param);
void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num, void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num,
int block_index, const int32_t *filter_zp, int32_t *input_sum, int block_index, const int32_t *filter_zp, int32_t *input_sum,
const ConvParameter *conv_param, bool per_channel, bool is_optimize); const ConvParameter *conv_param, bool per_channel, bool is_optimize);

View File

@ -26,7 +26,7 @@ int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dim
for (int w = 0; w < in_dims[2]; w++) { for (int w = 0; w < in_dims[2]; w++) {
const int8_t *in = in_data + Offset(in_dims, n, h, w, 0); const int8_t *in = in_data + Offset(in_dims, n, h, w, 0);
int8_t *out = out_data + Offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]); int8_t *out = out_data + Offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
memcpy(out, in, copy_size * sizeof(int8_t)); memcpy(out, in, (size_t)copy_size * sizeof(int8_t));
} }
} }
} }

View File

@ -112,7 +112,7 @@ int UInt8ToInt8(const uint8_t *real_values, int8_t *quant_values, int size) {
} }
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
int temp = real_values[i] - 128; int temp = (int)real_values[i] - 128;
if (temp > 127) { if (temp > 127) {
quant_values[i] = 127; quant_values[i] = 127;
} else if (temp < -128) { } else if (temp < -128) {

View File

@ -34,8 +34,8 @@ int16x4_t ClacSumHalfWordMul3(int32x4_t scaled_input0, int32x4_t scaled_input1,
const ScaleParameter *scale_param) { const ScaleParameter *scale_param) {
int32x4_t output_multiplier_vec = vdupq_n_s32(scale_param->scale_mul_arg_.multiplier_); int32x4_t output_multiplier_vec = vdupq_n_s32(scale_param->scale_mul_arg_.multiplier_);
int32x4_t output_multiplier_vec2 = vdupq_n_s32(scale_param->offset_mul_arg_.multiplier_); int32x4_t output_multiplier_vec2 = vdupq_n_s32(scale_param->offset_mul_arg_.multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << scale_param->scale_mul_arg_.left_shift_); int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)(scale_param->scale_mul_arg_.left_shift_));
int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << scale_param->offset_mul_arg_.left_shift_); int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << (size_t)(scale_param->offset_mul_arg_.left_shift_));
int32x4_t input_scale = vmulq_s32(scaled_input0, scaled_input1); int32x4_t input_scale = vmulq_s32(scaled_input0, scaled_input1);
int32x4_t raw_sum = RoundingDivideByPOTInt32x4( int32x4_t raw_sum = RoundingDivideByPOTInt32x4(
SaturatingRoundingDoublingHighMulInt32x4(vmulq_s32(input_scale, left_shift_out_vec), output_multiplier_vec), SaturatingRoundingDoublingHighMulInt32x4(vmulq_s32(input_scale, left_shift_out_vec), output_multiplier_vec),

View File

@ -24,7 +24,7 @@
#ifdef ENABLE_NEON #ifdef ENABLE_NEON
int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec, int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
int32x4_t output_multiplier_vec, SubQuantArg *para) { int32x4_t output_multiplier_vec, const SubQuantArg *para) {
int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1); int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1);
raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec), raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec),
@ -35,14 +35,14 @@ int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, in
return vqmovn_s32(raw_data); return vqmovn_s32(raw_data);
} }
void SubInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, void SubInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
SubQuantArg *para, int *index) { const SubQuantArg *para, int *index) {
int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_); int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_);
int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_); int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_);
int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_); int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_);
int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_); int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_);
int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_); int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32((1 << para->left_shift_out_)); int32x4_t left_shift_out_vec = vdupq_n_s32((1 << (size_t)para->left_shift_out_));
int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_); int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_);
int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_); int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_);

View File

@ -226,16 +226,16 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu
const int *strides = transpose_param->strides_; const int *strides = transpose_param->strides_;
const int *out_strides = transpose_param->out_strides_; const int *out_strides = transpose_param->out_strides_;
int num_axes = transpose_param->num_axes_; int num_axes = transpose_param->num_axes_;
size_t data_size = (*out_strides) * output_shape[0]; size_t data_size = (size_t)((*out_strides) * output_shape[0]);
size_t offset_size = UP_DIV(data_size, thread_num); size_t offset_size = UP_DIV(data_size, thread_num);
size_t task_offset = offset_size * task_id; size_t task_offset = offset_size * task_id;
int count = data_size - task_offset; size_t count = data_size - task_offset;
if (count <= 0) { if (data_size < task_offset) {
return; return;
} }
count = MSMIN(offset_size, count); count = MSMIN(offset_size, count);
for (size_t idx = task_offset; idx < task_offset + count; ++idx) { for (size_t idx = task_offset; idx < task_offset + count; ++idx) {
int pos = idx; int pos = (int)idx;
int output_idx = 0; int output_idx = 0;
int input_idx = 0; int input_idx = 0;
for (int i = 0; i < num_axes; ++i) { for (int i = 0; i < num_axes; ++i) {

View File

@ -24,7 +24,7 @@ int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParamete
float input_scale = para_->quant_arg.in_quant_args_.scale_; float input_scale = para_->quant_arg.in_quant_args_.scale_;
int8_t input_zp = para_->quant_arg.in_quant_args_.zp_; int8_t input_zp = para_->quant_arg.in_quant_args_.zp_;
for (int i = task_id; i < data_size; i += para_->thread_count_) { for (int i = task_id; i < (int)data_size; i += para_->thread_count_) {
output_ptr[i] = output_zp + round(1 / output_scale * input_scale * (input_ptr[i] - input_zp)); output_ptr[i] = output_zp + round(1 / output_scale * input_scale * (input_ptr[i] - input_zp));
} }
return 0; return 0;

View File

@ -23,14 +23,15 @@ typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int *dst, i
const int *input_sum, const int *bias); const int *input_sum, const int *bias);
typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel); int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel, int *filter_zp); int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int *filter_zp);
typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } OutType; typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } OutType;

View File

@ -165,7 +165,7 @@ int Conv2DINT8Coder::InitWeightBias(CoderContext *const context) {
} }
int Conv2DINT8Coder::Prepare(CoderContext *const context) { int Conv2DINT8Coder::Prepare(CoderContext *const context) {
Conv2DBaseCoder::Init(); MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
CheckSupportOptimize(); CheckSupportOptimize();
MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed!"); MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed!");
MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed."); MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");

View File

@ -24,7 +24,7 @@
namespace mindspore::lite::micro { namespace mindspore::lite::micro {
int ConvolutionDepthwiseINT8Coder::Prepare(CoderContext *const context) { int ConvolutionDepthwiseINT8Coder::Prepare(CoderContext *const context) {
Conv2DBaseCoder::Init(); MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
// init sliding window param // init sliding window param
MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed."); MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed.");
MS_CHECK_RET_CODE(InitWeightBias(context), "dwconvolution do init weightbais failed"); MS_CHECK_RET_CODE(InitWeightBias(context), "dwconvolution do init weightbais failed");

View File

@ -69,7 +69,7 @@ int ReduceInt8Coder::CalculateQuantArgs() {
QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift); QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift);
qm->left_shift_ = shift < 0 ? -shift : 0; qm->left_shift_ = shift < 0 ? -shift : 0;
qm->right_shift_ = shift > 0 ? shift : 0; qm->right_shift_ = shift > 0 ? shift : 0;
mean_multipliers_.push_back(qm); prod_multipliers_.push_back(qm);
} }
} }

View File

@ -30,7 +30,21 @@ class ReduceInt8Coder final : public ReduceBaseCoder {
const Model::Node *node, size_t node_index, Target target) const Model::Node *node, size_t node_index, Target target)
: ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {} : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
~ReduceInt8Coder() override { begin_src_data_ = nullptr; } ~ReduceInt8Coder() override {
begin_src_data_ = nullptr;
for (auto &arg : mean_multipliers_) {
delete arg;
arg = nullptr;
}
for (auto &arg : prod_multipliers_) {
delete arg;
arg = nullptr;
}
for (auto &arg : sum_square_multipliers_) {
delete arg;
arg = nullptr;
}
}
int Prepare(CoderContext *const context) override; int Prepare(CoderContext *const context) override;
int DoCode(CoderContext *const context) override; int DoCode(CoderContext *const context) override;

View File

@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_Softmax;
namespace mindspore::lite::micro::nnacl { namespace mindspore::lite::micro::nnacl {
int SoftMaxInt8Coder::Prepare(CoderContext *const context) { int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
SoftmaxBaseCoder::Init(); MS_CHECK_RET_CODE(SoftmaxBaseCoder::Init(), "Softmax base init failed.");
std::vector<LiteQuantParam> in_quant_args = input_tensor_->quant_params(); std::vector<LiteQuantParam> in_quant_args = input_tensor_->quant_params();
quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale; quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale;
quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint; quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint;
@ -59,8 +59,7 @@ int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
sum_data_size_ = inner_size * sizeof(int); sum_data_size_ = inner_size * sizeof(int);
sum_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace)); sum_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace));
MS_CHECK_PTR(sum_data_); MS_CHECK_PTR(sum_data_);
ReSize(); return ReSize();
return RET_OK;
} }
int SoftMaxInt8Coder::DoCode(CoderContext *const context) { int SoftMaxInt8Coder::DoCode(CoderContext *const context) {

View File

@ -20,11 +20,12 @@ extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, in
const int *input_sum, const int *bias); const int *input_sum, const int *bias);
extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, const int *multiplier, const int *left_shift, const int *right_shift, int row, int col,
size_t peroc); int stride, size_t peroc);
extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4, extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier, const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp); const int *multiplier, const int *left_shift, const int *right_shift, size_t stride,
size_t peroc, const int *filter_zp);
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
@ -33,16 +34,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
} }
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel) { int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi, return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
} }
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel, int32_t *filter_zp) { int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp) {
return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
right_shift, stride, per_channel, filter_zp); right_shift, stride, per_channel, filter_zp);
} }

View File

@ -29,13 +29,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
const int *input_sum, const int *bias); const int *input_sum, const int *bias);
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel); int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel, int32_t *filter_zp); int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp);
#endif #endif
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ #endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_

View File

@ -35,7 +35,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
memset(packed_weight_, 0, size); memset(packed_weight_, 0, size);
RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel); RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel);
/* bias */ /* bias */
size = UP_ROUND(output_channel, C2NUM); size = (size_t)UP_ROUND(output_channel, C2NUM);
int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t)); int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t));
if (bias_data_ == NULL) { if (bias_data_ == NULL) {
free(packed_weight_); free(packed_weight_);
@ -43,7 +43,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
} }
memset(bias_data_, 0, size * sizeof(int32_t)); memset(bias_data_, 0, size * sizeof(int32_t));
if (src_bias != NULL) { if (src_bias != NULL) {
memcpy(bias_data_, src_bias, output_channel * sizeof(int32_t)); memcpy(bias_data_, src_bias, (size_t)output_channel * sizeof(int32_t));
} }
#else #else
/* InitWeightBias */ /* InitWeightBias */

View File

@ -42,7 +42,7 @@ class ArithmeticInt8CPUKernel : public InnerKernel {
int8_t *tile_data0_{nullptr}; int8_t *tile_data0_{nullptr};
int8_t *tile_data1_{nullptr}; int8_t *tile_data1_{nullptr};
ArithmeticRunInt8 arithmetic_run_{nullptr}; ArithmeticRunInt8 arithmetic_run_{nullptr};
ArithmeticQuantArg quant_args_; ArithmeticQuantArg quant_args_ = {};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_

View File

@ -48,12 +48,12 @@ int BatchnormInt8CPUKernel::InitConstTensor() {
auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData()); auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData()); auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float))); alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
if (alpha_addr_ == nullptr) { if (alpha_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
} }
beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float))); beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
if (beta_addr_ == nullptr) { if (beta_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
@ -92,12 +92,12 @@ int BatchnormInt8CPUKernel::InitFusedConstTensor() {
auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData()); auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData()); auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float))); alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
if (alpha_addr_ == nullptr) { if (alpha_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
} }
beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float))); beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
if (beta_addr_ == nullptr) { if (beta_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;

View File

@ -59,11 +59,12 @@ int ConcatInt8CPUKernel::Init() {
} }
int ConcatInt8CPUKernel::ReSize() { int ConcatInt8CPUKernel::ReSize() {
concat_param_->axis_ = concat_param_->axis_ = concat_param_->axis_ >= 0
concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_; ? concat_param_->axis_
: static_cast<int>(in_tensors_.front()->shape().size()) + concat_param_->axis_;
auto input_num = in_tensors_.size(); auto input_num = in_tensors_.size();
concat_param_->input_num_ = input_num; concat_param_->input_num_ = static_cast<int>(input_num);
concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num)); concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num));
if (concat_param_->input_shapes_ == nullptr) { if (concat_param_->input_shapes_ == nullptr) {
MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed."; MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed.";
@ -97,7 +98,7 @@ int ConcatInt8CPUKernel::ReSize() {
memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(), memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(),
sizeof(int) * output_dim); sizeof(int) * output_dim);
for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) { for (size_t i = static_cast<size_t>(concat_param_->axis_ + 1); i < output_dim; i++) {
after_axis_size *= concat_param_->output_shapes_[i]; after_axis_size *= concat_param_->output_shapes_[i];
} }
concat_param_->after_axis_size = after_axis_size; concat_param_->after_axis_size = after_axis_size;
@ -122,21 +123,17 @@ int ConcatInt8CPUKernel::Run() {
int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto concat = reinterpret_cast<ConcatInt8CPUKernel *>(cdata); auto concat = reinterpret_cast<ConcatInt8CPUKernel *>(cdata);
auto ret = concat->DoExecute(task_id); concat->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConcatInt8Run task_id " << task_id << " failed.";
return ret;
}
return lite::RET_OK; return lite::RET_OK;
} }
int ConcatInt8CPUKernel::DoExecute(int task_id) { void ConcatInt8CPUKernel::DoExecute(int task_id) {
int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_); int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) { if (real_dst_count <= 0) {
return lite::RET_OK; return;
} }
Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id); Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id);
return lite::RET_OK; return;
} }
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator<ConcatInt8CPUKernel>) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator<ConcatInt8CPUKernel>)

View File

@ -57,7 +57,7 @@ class ConcatInt8CPUKernel : public InnerKernel {
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int DoExecute(int task_id); void DoExecute(int task_id);
private: private:
int64_t before_axis_size = 0; int64_t before_axis_size = 0;

View File

@ -25,7 +25,7 @@ namespace mindspore::kernel {
namespace { namespace {
constexpr size_t kUnitBufferMultipler = 4 * 4; constexpr size_t kUnitBufferMultipler = 4 * 4;
} // namespace } // namespace
int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) { int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param) {
auto input_channel = conv_param->input_channel_; auto input_channel = conv_param->input_channel_;
auto output_channel = conv_param->output_channel_; auto output_channel = conv_param->output_channel_;
auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
@ -116,7 +116,7 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() {
memset(bias_data_, 0, new_bias_size); memset(bias_data_, 0, new_bias_size);
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
auto ori_bias_addr = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->MutableData()); auto ori_bias_addr = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->MutableData());
memcpy(bias_data_, ori_bias_addr, output_channel * sizeof(int32_t)); memcpy(bias_data_, ori_bias_addr, static_cast<size_t>(output_channel) * sizeof(int32_t));
} else { } else {
MS_ASSERT(in_tensors_.size() == kInputSize1); MS_ASSERT(in_tensors_.size() == kInputSize1);
} }

View File

@ -46,7 +46,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
int32_t *tmp_dst_buffer_ = nullptr; int32_t *tmp_dst_buffer_ = nullptr;
int8_t *tmp_out_ = nullptr; int8_t *tmp_out_ = nullptr;
}; };
int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param);
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_

View File

@ -60,13 +60,13 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(), PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(),
weight_tensor->Batch()); weight_tensor->Batch());
packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
if (packed_weight_ == nullptr) { if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
free(tmp_weight); free(tmp_weight);
return RET_ERROR; return RET_ERROR;
} }
bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
if (filter_per_channel) { if (filter_per_channel) {
for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) { for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
for (int c = 0; c < channel; c++) { for (int c = 0; c < channel; c++) {
@ -87,16 +87,16 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
} }
free(tmp_weight); free(tmp_weight);
bias_data_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
if (bias_data_ == nullptr) { if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
} }
memset(bias_data_, 0, channel * sizeof(int32_t)); memset(bias_data_, 0, static_cast<size_t>(channel) * sizeof(int32_t));
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex); auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData()); auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t)); memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
} }
return RET_OK; return RET_OK;
} }
@ -153,7 +153,8 @@ int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() { int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() {
int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_; int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_;
buffer_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(buffer_size * sizeof(int8_t))); buffer_ =
reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(buffer_size) * sizeof(int8_t)));
if (buffer_ == nullptr) { if (buffer_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;

View File

@ -55,7 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
return RET_ERROR; return RET_ERROR;
} }
bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
if (filter_per_channel) { if (filter_per_channel) {
for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) { for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
for (int c = 0; c < channel; c++) { for (int c = 0; c < channel; c++) {

View File

@ -42,7 +42,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData()); auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData());
int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
if (packed_weight_ == nullptr) { if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
@ -50,16 +50,16 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
weight_tensor->Batch(), &(conv_param_->conv_quant_arg_)); weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
bias_data_ = reinterpret_cast<int32_t *>(malloc(C8NUM * OC8 * sizeof(int32_t))); bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t)));
if (bias_data_ == nullptr) { if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
} }
memset(bias_data_, 0, C8NUM * OC8 * sizeof(int32_t)); memset(bias_data_, 0, static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t));
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex); auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData()); auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t)); memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
} }
conv_param_->thread_num_ = MSMIN(thread_count_, OC8); conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
@ -72,7 +72,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM *
UP_DIV(conv_param_->input_channel_, C8NUM); UP_DIV(conv_param_->input_channel_, C8NUM);
packed_input_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_input_size * sizeof(int8_t))); packed_input_ =
reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(pack_input_size) * sizeof(int8_t)));
if (packed_input_ == nullptr) { if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
@ -80,7 +81,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM *
UP_DIV(conv_param_->output_channel_, C8NUM); UP_DIV(conv_param_->output_channel_, C8NUM);
packed_output_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); packed_output_ = reinterpret_cast<int8_t *>(
ms_context_->allocator->Malloc(static_cast<size_t>(pack_output_size) * sizeof(int8_t)));
if (packed_output_ == nullptr) { if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed."; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR; return RET_ERROR;
@ -150,10 +152,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
auto input_tensor = in_tensors_.at(kInputIndex); auto input_tensor = in_tensors_.at(kInputIndex);
auto channel = conv_param_->input_channel_; auto channel = conv_param_->input_channel_;
input_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float))); input_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
MSLITE_CHECK_PTR(input_scale_); MSLITE_CHECK_PTR(input_scale_);
input_zp_ = reinterpret_cast<int8_t *>(malloc(channel * sizeof(int8_t))); input_zp_ = reinterpret_cast<int8_t *>(malloc(static_cast<size_t>(channel) * sizeof(int8_t)));
MSLITE_CHECK_PTR(input_zp_); MSLITE_CHECK_PTR(input_zp_);
if (input_tensor->quant_params().size() == kPerTensor) { if (input_tensor->quant_params().size() == kPerTensor) {
@ -171,10 +173,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
} }
auto output_tensor = out_tensors_.at(kOutputIndex); auto output_tensor = out_tensors_.at(kOutputIndex);
output_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float))); output_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
MSLITE_CHECK_PTR(output_scale_); MSLITE_CHECK_PTR(output_scale_);
output_zp_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); output_zp_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(output_zp_); MSLITE_CHECK_PTR(output_zp_);
if (output_tensor->quant_params().size() == kPerTensor) { if (output_tensor->quant_params().size() == kPerTensor) {
@ -191,25 +193,26 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
} }
} }
conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(channel * sizeof(double))); conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(static_cast<size_t>(channel) * sizeof(double)));
MSLITE_CHECK_PTR(conv_quant_arg_->real_multiplier_); MSLITE_CHECK_PTR(conv_quant_arg_->real_multiplier_);
conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->left_shift_); MSLITE_CHECK_PTR(conv_quant_arg_->left_shift_);
conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->right_shift_); MSLITE_CHECK_PTR(conv_quant_arg_->right_shift_);
conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); conv_quant_arg_->quant_multiplier_ =
reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->quant_multiplier_); MSLITE_CHECK_PTR(conv_quant_arg_->quant_multiplier_);
conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->out_act_min_); MSLITE_CHECK_PTR(conv_quant_arg_->out_act_min_);
conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t))); conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->out_act_max_); MSLITE_CHECK_PTR(conv_quant_arg_->out_act_max_);
weight_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float))); weight_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
MSLITE_CHECK_PTR(weight_scale_); MSLITE_CHECK_PTR(weight_scale_);
auto weight_tensor = in_tensors_.at(kWeightIndex); auto weight_tensor = in_tensors_.at(kWeightIndex);

View File

@ -98,12 +98,12 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
memset(bias_data_, 0, bias_size); memset(bias_data_, 0, bias_size);
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->data_c()); auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->data_c());
memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t)); memcpy(bias_data_, ori_bias, static_cast<size_t>(output_channel) * sizeof(int32_t));
} else { } else {
MS_ASSERT(in_tensors_.size() == kInputSize1); MS_ASSERT(in_tensors_.size() == kInputSize1);
} }
auto *bias_data = reinterpret_cast<int32_t *>(bias_data_); auto *bias_data = reinterpret_cast<int32_t *>(bias_data_);
bool filter_peroc = conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL; bool filter_peroc = static_cast<bool>(conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL);
if (filter_peroc) { if (filter_peroc) {
filter_zp_ptr_ = reinterpret_cast<int32_t *>(malloc(output_channel * sizeof(int32_t))); filter_zp_ptr_ = reinterpret_cast<int32_t *>(malloc(output_channel * sizeof(int32_t)));
if (filter_zp_ptr_ == nullptr) { if (filter_zp_ptr_ == nullptr) {
@ -126,9 +126,9 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
size_t input_sum_size; size_t input_sum_size;
if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) { if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
input_sum_size = up_round_oc * tile_num_ * thread_count_ * sizeof(int32_t); input_sum_size = static_cast<size_t>(up_round_oc * tile_num_ * thread_count_) * sizeof(int32_t);
} else { } else {
input_sum_size = tile_num_ * thread_count_ * sizeof(int32_t); input_sum_size = static_cast<size_t>(tile_num_ * thread_count_) * sizeof(int32_t);
} }
input_sum_ = reinterpret_cast<int32_t *>(malloc(input_sum_size)); input_sum_ = reinterpret_cast<int32_t *>(malloc(input_sum_size));
if (input_sum_ == nullptr) { if (input_sum_ == nullptr) {

View File

@ -57,21 +57,16 @@ int CropInt8CPUKernel::Run() {
int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto crop = reinterpret_cast<CropInt8CPUKernel *>(cdata); auto crop = reinterpret_cast<CropInt8CPUKernel *>(cdata);
auto ret = crop->DoExecute(task_id); crop->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "CropInt8Run task id " << task_id << " run failed.";
return ret;
}
return RET_OK; return RET_OK;
} }
int CropInt8CPUKernel::DoExecute(int task_id) { void CropInt8CPUKernel::DoExecute(int task_id) {
auto input_tensor = in_tensors_.at(kInputIndex); auto input_tensor = in_tensors_.at(kInputIndex);
auto out_tensor = out_tensors_.at(kOutputIndex); auto out_tensor = out_tensors_.at(kOutputIndex);
int8_t *input_data = reinterpret_cast<int8_t *>(input_tensor->data_c()); int8_t *input_data = reinterpret_cast<int8_t *>(input_tensor->data_c());
int8_t *output_data = reinterpret_cast<int8_t *>(out_tensor->data_c()); int8_t *output_data = reinterpret_cast<int8_t *>(out_tensor->data_c());
Int8Crop(input_data, output_data, task_id, crop_para_); Int8Crop(input_data, output_data, task_id, crop_para_);
return RET_OK;
} }
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, LiteKernelCreator<CropInt8CPUKernel>) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, LiteKernelCreator<CropInt8CPUKernel>)

View File

@ -36,7 +36,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel {
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int DoExecute(int task_id); void DoExecute(int task_id);
}; };
int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale); int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);

View File

@ -57,7 +57,7 @@ int GatherNdInt8CPUKernel::ReSize() {
auto indices_tensor = in_tensors_.at(1); auto indices_tensor = in_tensors_.at(1);
auto indices_shape = indices_tensor->shape(); auto indices_shape = indices_tensor->shape();
int indices_rank = indices_shape.size(); int indices_rank = static_cast<size_t>(indices_shape.size());
count_ = 1; count_ = 1;
for (int i = 0; i < indices_rank - 1; ++i) { for (int i = 0; i < indices_rank - 1; ++i) {
count_ *= indices_shape[i]; count_ *= indices_shape[i];
@ -66,12 +66,12 @@ int GatherNdInt8CPUKernel::ReSize() {
MS_LOG(ERROR) << "count_ is invalid, count_: " << count_; MS_LOG(ERROR) << "count_ is invalid, count_: " << count_;
return RET_ERROR; return RET_ERROR;
} }
in_offset_ = reinterpret_cast<int *>(malloc(count_ * sizeof(int))); in_offset_ = reinterpret_cast<int *>(malloc(static_cast<size_t>(count_) * sizeof(int)));
if (in_offset_ == nullptr) { if (in_offset_ == nullptr) {
MS_LOG(ERROR) << "GatherNdInt8 Malloc in_offset_ error!"; MS_LOG(ERROR) << "GatherNdInt8 Malloc in_offset_ error!";
return RET_ERROR; return RET_ERROR;
} }
(void)memset(in_offset_, 0, count_ * sizeof(int)); (void)memset(in_offset_, 0, static_cast<size_t>(count_) * sizeof(int));
thread_sz_count_ = MSMIN(thread_count_, count_); thread_sz_count_ = MSMIN(thread_count_, count_);
if (thread_sz_count_ == 0) { if (thread_sz_count_ == 0) {
MS_LOG(ERROR) << "div zero"; MS_LOG(ERROR) << "div zero";
@ -85,9 +85,9 @@ int GatherNdInt8CPUKernel::InitOffset() {
auto ind_quant_args = in_tensors_.at(1)->quant_params(); auto ind_quant_args = in_tensors_.at(1)->quant_params();
auto indices_tensor = in_tensors_.at(1); auto indices_tensor = in_tensors_.at(1);
auto indices_shape = indices_tensor->shape(); auto indices_shape = indices_tensor->shape();
int indices_rank = indices_shape.size(); int indices_rank = static_cast<size_t>(indices_shape.size());
auto in_shape = in_tensors_.front()->shape(); auto in_shape = in_tensors_.front()->shape();
int in_rank = in_shape.size(); int in_rank = static_cast<size_t>(in_shape.size());
if (indices_rank < 1) { if (indices_rank < 1) {
MS_LOG(ERROR) << "inex out of bounds"; MS_LOG(ERROR) << "inex out of bounds";
return RET_ERROR; return RET_ERROR;

View File

@ -44,7 +44,7 @@ class GatherNdInt8CPUKernel : public InnerKernel {
int *in_offset_ = nullptr; int *in_offset_ = nullptr;
int8_t *in_ptr_ = nullptr; int8_t *in_ptr_ = nullptr;
int8_t *out_ptr_ = nullptr; int8_t *out_ptr_ = nullptr;
GatherQuantArg param_; GatherQuantArg param_ = {};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -29,7 +29,7 @@ int GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) {
int8_t *src_ptr = reinterpret_cast<int8_t *>(ori_in_data_) + group_id * sub_in_channel; int8_t *src_ptr = reinterpret_cast<int8_t *>(ori_in_data_) + group_id * sub_in_channel;
int8_t *dst_ptr = sub_in_data; int8_t *dst_ptr = sub_in_data;
for (int i = 0; i < in_plane; ++i) { for (int i = 0; i < in_plane; ++i) {
memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(int8_t)); memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_in_channel) * sizeof(int8_t));
src_ptr += ori_in_channel; src_ptr += ori_in_channel;
dst_ptr += sub_in_channel; dst_ptr += sub_in_channel;
} }
@ -45,7 +45,7 @@ int GroupConvolutionInt8CPUKernel::PostConcat(int group_id) {
int8_t *src_ptr = sub_out_data; int8_t *src_ptr = sub_out_data;
int8_t *dst_ptr = reinterpret_cast<int8_t *>(ori_out_data_) + group_id * sub_out_channel; int8_t *dst_ptr = reinterpret_cast<int8_t *>(ori_out_data_) + group_id * sub_out_channel;
for (int i = 0; i < out_plane; ++i) { for (int i = 0; i < out_plane; ++i) {
memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(int8_t)); memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_out_channel) * sizeof(int8_t));
src_ptr += sub_out_channel; src_ptr += sub_out_channel;
dst_ptr += ori_out_channel; dst_ptr += ori_out_channel;
} }

View File

@ -37,7 +37,7 @@ class HswishInt8CPUKernel : public InnerKernel {
private: private:
int thread_count_; int thread_count_;
HswishQuantArg quant_arg_; HswishQuantArg quant_arg_ = {};
void MultiplierInt32ToInt16(int32_t input, int16_t *output) const; void MultiplierInt32ToInt16(int32_t input, int16_t *output) const;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -39,7 +39,7 @@ class LeakyReluInt8CPUKernel : public InnerKernel {
int DoExecute(int task_id); int DoExecute(int task_id);
private: private:
LeakyReluQuantArg quant_prelu_parm_; LeakyReluQuantArg quant_prelu_parm_ = {};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -187,29 +187,21 @@ int MulInt8CPUKernel::Run() {
int FastHWBroadcastMulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int FastHWBroadcastMulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata); auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
auto ret = mul->FastDoExecute(task_id); mul->FastDoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "FastHWBroadcastMulInt8Run task_id " << task_id << " failed.";
return ret;
}
return lite::RET_OK; return lite::RET_OK;
} }
int MulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int MulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata); auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
auto ret = mul->DoExecute(task_id); mul->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "MulInt8Run task_id " << task_id << " failed.";
return ret;
}
return lite::RET_OK; return lite::RET_OK;
} }
int MulInt8CPUKernel::FastDoExecute(int task_id) { void MulInt8CPUKernel::FastDoExecute(int task_id) {
int depth = out_tensors_.front()->Channel(); int depth = out_tensors_.front()->Channel();
int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) { if (real_dst_count <= 0) {
return lite::RET_OK; return;
} }
int8_t *cur_input0_data = input0_data_; int8_t *cur_input0_data = input0_data_;
int8_t *cur_input1_data = input1_data_ + task_id * count_unit_ * depth; int8_t *cur_input1_data = input1_data_ + task_id * count_unit_ * depth;
@ -219,20 +211,19 @@ int MulInt8CPUKernel::FastDoExecute(int task_id) {
cur_input1_data = input0_data_ + task_id * count_unit_ * depth; cur_input1_data = input0_data_ + task_id * count_unit_ * depth;
} }
FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_); FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_);
return RET_OK;
} }
int MulInt8CPUKernel::DoExecute(int task_id) { void MulInt8CPUKernel::DoExecute(int task_id) {
int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) { if (real_dst_count <= 0) {
return lite::RET_OK; return;
} }
int8_t *cur_input0_data = input0_data_ + task_id * count_unit_; int8_t *cur_input0_data = input0_data_ + task_id * count_unit_;
int8_t *cur_input1_data = input1_data_ + task_id * count_unit_; int8_t *cur_input1_data = input1_data_ + task_id * count_unit_;
int8_t *cur_output_data = output_data_ + task_id * count_unit_; int8_t *cur_output_data = output_data_ + task_id * count_unit_;
Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_); Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_);
return lite::RET_OK; return;
} }
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_MulFusion, LiteKernelCreator<MulInt8CPUKernel>) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_MulFusion, LiteKernelCreator<MulInt8CPUKernel>)

View File

@ -39,8 +39,8 @@ class MulInt8CPUKernel : public InnerKernel {
void CheckSameShapeSize(std::vector<int> in_tensor0_shape, std::vector<int> in_tensor1_shape); void CheckSameShapeSize(std::vector<int> in_tensor0_shape, std::vector<int> in_tensor1_shape);
void CheckIfFastImpl(); void CheckIfFastImpl();
int Run() override; int Run() override;
int DoExecute(int task_id); void DoExecute(int task_id);
int FastDoExecute(int task_id); void FastDoExecute(int task_id);
private: private:
const lite::InnerContext *ctx_ = nullptr; const lite::InnerContext *ctx_ = nullptr;

View File

@ -30,16 +30,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
} }
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel) { int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi, return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
} }
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel, int32_t *filter_zp) { int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp) {
return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
right_shift, stride, per_channel, filter_zp); right_shift, stride, per_channel, filter_zp);
} }

View File

@ -25,11 +25,11 @@ extern "C" {
void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
const int *input_sum, const int *bias); const int *input_sum, const int *bias);
void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums, void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums,
const int *bias, int act_min, int act_max, int out_zp, int *multiplier, int *left_shift, const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
int *right_shift, int row, int col, int stride, size_t peroc); const int *left_shift, const int *right_shift, int row, int col, int stride, size_t peroc);
void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4, void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier, const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp); const int *left_shift, const int *right_shift, size_t stride, size_t peroc, const int *filter_zp);
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias, void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
size_t ksize, size_t ic4, size_t output_channel, size_t offset, size_t ksize, size_t ic4, size_t output_channel, size_t offset,
@ -40,13 +40,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
const int *input_sum, const int *bias); const int *input_sum, const int *bias);
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel); int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, size_t stride, const int32_t *input_sum, const int32_t *bias,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t maxi, size_t per_channel, int32_t *filter_zp); int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -85,7 +85,7 @@ int PadInt8CPUKernel::SetQuantParam() {
int PadInt8CPUKernel::InitPadParam() { int PadInt8CPUKernel::InitPadParam() {
auto in_dims = in_tensors_.at(0)->shape(); auto in_dims = in_tensors_.at(0)->shape();
auto out_dims = out_tensors_.at(0)->shape(); auto out_dims = out_tensors_.at(0)->shape();
int ndims = in_dims.size(); int ndims = static_cast<size_t>(in_dims.size());
int in[] = {1, 1, 1, 1}; int in[] = {1, 1, 1, 1};
int out[] = {1, 1, 1, 1}; int out[] = {1, 1, 1, 1};
@ -267,7 +267,8 @@ int PadInt8CPUKernel::Run() {
int error_code; int error_code;
if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) { if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t)); memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0],
static_cast<size_t>(out_tensors_[0]->ElementsNum()) * sizeof(int8_t));
error_code = ParallelLaunch(this->ms_context_, PadInt8Impl, this, op_parameter_->thread_num_); error_code = ParallelLaunch(this->ms_context_, PadInt8Impl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";

View File

@ -93,7 +93,7 @@ class ReduceInt8CPUKernel : public ReduceBaseCPUKernel {
bool valid_shape_ = false; bool valid_shape_ = false;
bool pattern_impl_ = false; bool pattern_impl_ = false;
Four_DIMENSION_REDUCE_TEMPLATE pattern_; Four_DIMENSION_REDUCE_TEMPLATE pattern_;
QuantMulArg reduce_mean_quant_param_; // used in reduce mean 4D situation QuantMulArg reduce_mean_quant_param_ = {}; // used in reduce mean 4D situation
Reducer reducer_ = nullptr; Reducer reducer_ = nullptr;
LastReducer last_reducer_ = nullptr; LastReducer last_reducer_ = nullptr;
std::vector<QuantMulArg *> mean_multipliers_; std::vector<QuantMulArg *> mean_multipliers_;

View File

@ -37,7 +37,7 @@ class ReluXInt8CPUKernel : public InnerKernel {
int Run() override; int Run() override;
int DoActivation(int task_id); int DoActivation(int task_id);
ReluXQuantArg quant_arg_; ReluXQuantArg quant_arg_ = {};
private: private:
int type_{0}; int type_{0};

View File

@ -63,18 +63,14 @@ int ReshapeInt8CPUKernel::Run() {
int ReshapeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int ReshapeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto reshape = reinterpret_cast<ReshapeInt8CPUKernel *>(cdata); auto reshape = reinterpret_cast<ReshapeInt8CPUKernel *>(cdata);
auto ret = reshape->DoExecute(task_id); reshape->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Reshapeint8Run task_id " << task_id << " failed.";
return ret;
}
return lite::RET_OK; return lite::RET_OK;
} }
int ReshapeInt8CPUKernel::DoExecute(int task_id) { void ReshapeInt8CPUKernel::DoExecute(int task_id) {
int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) { if (real_dst_count <= 0) {
return lite::RET_OK; return;
} }
MS_ASSERT(input_data_); MS_ASSERT(input_data_);
MS_ASSERT(output_data_); MS_ASSERT(output_data_);
@ -82,7 +78,7 @@ int ReshapeInt8CPUKernel::DoExecute(int task_id) {
int8_t *cur_output_data = output_data_ + task_id * count_unit_; int8_t *cur_output_data = output_data_ + task_id * count_unit_;
Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_); Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_);
return lite::RET_OK; return;
} }
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reshape, LiteKernelCreator<ReshapeInt8CPUKernel>) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reshape, LiteKernelCreator<ReshapeInt8CPUKernel>)

View File

@ -37,7 +37,7 @@ class ReshapeInt8CPUKernel : public InnerKernel {
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int DoExecute(int task_id); void DoExecute(int task_id);
private: private:
int64_t elements_num_ = 0; int64_t elements_num_ = 0;

View File

@ -37,20 +37,32 @@ constexpr unsigned int OFFSET_BASE = 10;
} // namespace } // namespace
void ResizeInt8CPUKernel::FreeResizeBiLinear() { void ResizeInt8CPUKernel::FreeResizeBiLinear() {
free(resize_quant_arg_.x_axis_index_); free(resize_quant_arg_.x_axis_index_);
resize_quant_arg_.x_axis_index_ = nullptr;
free(resize_quant_arg_.x_axis_lower_); free(resize_quant_arg_.x_axis_lower_);
resize_quant_arg_.x_axis_lower_ = nullptr;
free(resize_quant_arg_.x_axis_upper_); free(resize_quant_arg_.x_axis_upper_);
resize_quant_arg_.x_axis_upper_ = nullptr;
free(resize_quant_arg_.y_axis_index_); free(resize_quant_arg_.y_axis_index_);
resize_quant_arg_.y_axis_index_ = nullptr;
free(resize_quant_arg_.y_axis_lower_); free(resize_quant_arg_.y_axis_lower_);
resize_quant_arg_.y_axis_lower_ = nullptr;
free(resize_quant_arg_.y_axis_upper_); free(resize_quant_arg_.y_axis_upper_);
resize_quant_arg_.y_axis_upper_ = nullptr;
} }
void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() { void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() {
free(resize_float_quant_arg_.x_axis_index_); free(resize_float_quant_arg_.x_axis_index_);
resize_float_quant_arg_.x_axis_index_ = nullptr;
free(resize_float_quant_arg_.x_axis_lower_); free(resize_float_quant_arg_.x_axis_lower_);
resize_float_quant_arg_.x_axis_lower_ = nullptr;
free(resize_float_quant_arg_.x_axis_upper_); free(resize_float_quant_arg_.x_axis_upper_);
resize_float_quant_arg_.x_axis_upper_ = nullptr;
free(resize_float_quant_arg_.y_axis_index_); free(resize_float_quant_arg_.y_axis_index_);
resize_float_quant_arg_.y_axis_index_ = nullptr;
free(resize_float_quant_arg_.y_axis_lower_); free(resize_float_quant_arg_.y_axis_lower_);
resize_float_quant_arg_.y_axis_lower_ = nullptr;
free(resize_float_quant_arg_.y_axis_upper_); free(resize_float_quant_arg_.y_axis_upper_);
resize_float_quant_arg_.y_axis_upper_ = nullptr;
} }
ResizeInt8CPUKernel::~ResizeInt8CPUKernel() { ResizeInt8CPUKernel::~ResizeInt8CPUKernel() {

View File

@ -52,8 +52,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
QuantArg *quant_in_{nullptr}; QuantArg *quant_in_{nullptr};
QuantArg *quant_out_{nullptr}; QuantArg *quant_out_{nullptr};
QuantMulArg *multiplier_{nullptr}; QuantMulArg *multiplier_{nullptr};
ResizeQuantArg resize_quant_arg_; ResizeQuantArg resize_quant_arg_ = {};
ResizeFloatScaleQuantArg resize_float_quant_arg_; ResizeFloatScaleQuantArg resize_float_quant_arg_ = {};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -64,7 +64,7 @@ int SqueezeInt8CPUKernel::Init() {
auto quant_params = output_tensor->quant_params(); auto quant_params = output_tensor->quant_params();
MS_ASSERT(quant_params.size() == 1); MS_ASSERT(quant_params.size() == 1);
quant_squeeze_param_->out_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg))); quant_squeeze_param_->out_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
if (quant_squeeze_param_->in_quant_args_ == nullptr) { if (quant_squeeze_param_->out_quant_args_ == nullptr) {
MS_LOG(ERROR) << "malloc QuantArg failed"; MS_LOG(ERROR) << "malloc QuantArg failed";
if (quant_squeeze_param_ != nullptr) { if (quant_squeeze_param_ != nullptr) {
if (quant_squeeze_param_->in_quant_args_ != nullptr) { if (quant_squeeze_param_->in_quant_args_ != nullptr) {
@ -97,15 +97,11 @@ int SqueezeInt8CPUKernel::Run() {
int SqueezeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int SqueezeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto Squeeze = reinterpret_cast<SqueezeInt8CPUKernel *>(cdata); auto Squeeze = reinterpret_cast<SqueezeInt8CPUKernel *>(cdata);
auto ret = Squeeze->DoExecute(task_id); Squeeze->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SqueezeInt8Run task_id " << task_id << " failed.";
return ret;
}
return RET_OK; return RET_OK;
} }
int SqueezeInt8CPUKernel::DoExecute(int task_id) { void SqueezeInt8CPUKernel::DoExecute(int task_id) {
auto input_tensor = in_tensors_.at(kInputIndex); auto input_tensor = in_tensors_.at(kInputIndex);
MS_ASSERT(input_tensor); MS_ASSERT(input_tensor);
auto out_tensor = out_tensors_.at(kOutputIndex); auto out_tensor = out_tensors_.at(kOutputIndex);
@ -117,7 +113,6 @@ int SqueezeInt8CPUKernel::DoExecute(int task_id) {
int num = input_tensor->ElementsNum(); int num = input_tensor->ElementsNum();
SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_); SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_);
return RET_OK;
} }
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeInt8CPUKernel>) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeInt8CPUKernel>)

View File

@ -36,7 +36,7 @@ class SqueezeInt8CPUKernel : public InnerKernel {
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int DoExecute(int tId); void DoExecute(int tId);
private: private:
SqueezeQuantArg *quant_squeeze_param_{nullptr}; SqueezeQuantArg *quant_squeeze_param_{nullptr};

View File

@ -46,7 +46,7 @@ class TanhInt8CPUKernel : public InnerKernel {
int element_size_{0}; int element_size_{0};
int thread_count_{0}; int thread_count_{0};
int thread_stride_{0}; int thread_stride_{0};
TanhQuantParameter tanh_quant_; TanhQuantParameter tanh_quant_ = {};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -79,7 +79,7 @@ int TransposeInt8CPUKernel::DoTranspose(int task_id) {
return RET_OK; return RET_OK;
} }
void TransposeInt8CPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, void TransposeInt8CPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
const TransposeParameter *param) { const TransposeParameter *param) {
auto out_shape = out_tensor->shape(); auto out_shape = out_tensor->shape();
if (in_tensor->shape().size() == DIMENSION_4D && param->perm_[0] == 0 && param->perm_[1] == 2 && if (in_tensor->shape().size() == DIMENSION_4D && param->perm_[0] == 0 && param->perm_[1] == 2 &&

View File

@ -44,7 +44,8 @@ class TransposeInt8CPUKernel : public InnerKernel {
int DoTranspose(int task_id); int DoTranspose(int task_id);
private: private:
void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, const TransposeParameter *param); void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
const TransposeParameter *param);
TransposeParameter *transpose_param_; TransposeParameter *transpose_param_;
TransposeFunc NHNCTransposeFunc_ = nullptr; TransposeFunc NHNCTransposeFunc_ = nullptr;
int8_t *in_ptr_ = nullptr; int8_t *in_ptr_ = nullptr;