!21462 fix mix using signed and unsigned values

Merge pull request !21462 from zhaozhenlong/lite/issue/codex_0803
This commit is contained in:
i-robot 2021-08-09 06:09:28 +00:00 committed by Gitee
commit 3acfeac239
61 changed files with 255 additions and 240 deletions

View File

@ -5,7 +5,8 @@
//void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
// const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
// int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, int peroc);
// const int *multiplier, const int *left_shift, const int *right_shift, int row,
// int col, int stride, int peroc);
// x0: a(left matrix ptr)
// x1: b(right matrix ptr)

View File

@ -4,8 +4,9 @@
.align 5
//void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums,
// const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift,
// int32_t *right_shift, size_t stride, size_t filter_peroc, int32_t *filter_zp)
// const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
// const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc,
// const int32_t *filter_zp)
// x0: a(left matrix ptr)
// x1: b(right matrix ptr)

View File

@ -39,8 +39,8 @@ void DoArgMinMaxQuant(const int8_t *input, int8_t *output, const ArgMinMaxParame
float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
int32_t output_zp = out_quant_arg->zp_;
for (int i = 0; i < pre_axis_count; ++i) {
size_t output_offset = i * after_axis_count;
size_t input_offset = output_offset * axis_count;
int output_offset = i * after_axis_count;
int input_offset = output_offset * axis_count;
for (int j = 0; j < after_axis_count; ++j) {
float value = -FLT_MAX;
if (!param->get_max_) {
@ -97,8 +97,8 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
int32_t output_zp = out_quant_arg->zp_;
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
for (int j = 0; j < in_shape[0]; ++j) {
size_t offset = param->in_strides_[0] * j + i;
param->arg_elements_[j].index_ = j;
int offset = param->in_strides_[0] * j + i;
param->arg_elements_[j].index_ = (uint32_t)j;
param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
@ -108,7 +108,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
}
for (int j = 0; j < param->topk_; ++j) {
size_t out_offset = j * param->out_strides_[0] + i;
int out_offset = j * param->out_strides_[0] + i;
float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
@ -123,12 +123,12 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
int32_t output_zp = out_quant_arg->zp_;
int in_shape1 = in_shape[1];
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0];
int in_dim0_offset = i * param->in_strides_[0];
int out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < param->in_strides_[1]; ++j) {
for (int k = 0; k < in_shape1; ++k) {
size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
param->arg_elements_[k].index_ = k;
int offset = param->in_strides_[1] * k + in_dim0_offset + j;
param->arg_elements_[k].index_ = (size_t)k;
param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
@ -138,7 +138,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
}
for (int k = 0; k < param->topk_; ++k) {
size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
int out_offset = out_dim0_offset + j + k * param->out_strides_[1];
float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
@ -155,15 +155,15 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
int in_shape1 = in_shape[1];
int in_shape2 = in_shape[2];
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0];
int in_dim0_offset = i * param->in_strides_[0];
int out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < in_shape1; ++j) {
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
for (int k = 0; k < param->in_strides_[2]; ++k) {
for (int l = 0; l < in_shape2; ++l) {
size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
param->arg_elements_[l].index_ = l;
int offset = param->in_strides_[2] * l + k + in_dim1_offset;
param->arg_elements_[l].index_ = (uint32_t)l;
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
@ -172,7 +172,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
}
for (int l = 0; l < param->topk_; ++l) {
size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
int out_offset = out_dim1_offset + k + l * param->out_strides_[2];
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}
@ -191,17 +191,17 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
int in_shape2 = in_shape[2];
int in_shape3 = in_shape[3];
for (int i = 0; i < in_shape[0]; ++i) {
size_t in_dim0_offset = i * param->in_strides_[0];
size_t out_dim0_offset = i * param->out_strides_[0];
int in_dim0_offset = i * param->in_strides_[0];
int out_dim0_offset = i * param->out_strides_[0];
for (int j = 0; j < in_shape1; ++j) {
size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
for (int k = 0; k < in_shape2; ++k) {
size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
for (int l = 0; l < in_shape3; ++l) {
size_t offset = l + in_dim2_offset;
param->arg_elements_[l].index_ = l;
int offset = l + in_dim2_offset;
param->arg_elements_[l].index_ = (uint32_t)l;
param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
}
if (param->get_max_) {
@ -210,7 +210,7 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
}
for (int l = 0; l < param->topk_; ++l) {
size_t out_offset = out_dim2_offset + l;
int out_offset = out_dim2_offset + l;
float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
}

View File

@ -218,7 +218,7 @@ int16x4_t ClacSumHalfWord(int32x4_t scaled_input, int32x4_t left_shift_out_vec,
void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para,
int *index) {
int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)para.shift_left_);
for (; (*index) <= element_size - 8; (*index) += 8) {
int16x8_t input_val = LoadAndAddOffset(input_data, *index, para.in_args_.zp_);

View File

@ -812,11 +812,11 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
for (int j = real_y_start; j < real_y_end; j++) {
const int16_t *src = input_data + src_c8_offset + C8NUM * (j * input_width + real_x_start);
int16_t *dst = tmp_data + C8NUM * (C4NUM * j + real_x_start);
memcpy(dst, src, (real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
memcpy(dst, src, (size_t)(real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
}
// input transform
int dst_ic8_offset = dst_plane_offset + ic * TILE_NUM * C8NUM;
size_t dst_step = ic8 * C8NUM * TILE_NUM;
size_t dst_step = (size_t)ic8 * C8NUM * TILE_NUM;
int16_t *trans_input_ptr = trans_input + dst_ic8_offset;
Conv3x3Int8InputUnit(tmp_data, trans_input_ptr, dst_step, input_zp);
}
@ -826,7 +826,7 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) {
int oc4 = UP_DIV(oc, C4NUM);
#ifdef ENABLE_ARM
IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t));
IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, (size_t)oc4 * 4 * 16 * sizeof(int32_t));
#else
const int input_unit_square = 16;
for (int c = 0; c < oc4; c++) {

View File

@ -20,9 +20,9 @@
int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
const ConvParameter *conv_param) {
/* row4x4-major(ih*iw x oc*kh*kw) -> row4-major(oh*ow x oc) */
size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
int input_plane = conv_param->input_w_ * conv_param->input_h_;
int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
int output_plane = conv_param->output_w_ * conv_param->output_h_;
int oc4 = UP_DIV(output_channel, C4NUM);
int in_plane4 = UP_ROUND(input_plane, C4NUM);
@ -38,7 +38,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
for (int c = 0; c < oc4; c++) {
int32_t *dst_ptr = tmp + c * output_plane * C4NUM;
const int32_t *src_ptr = src + c * in_plane4 * kernel_plane * C4NUM;
memset(dst_ptr, 0, output_plane * C4NUM * sizeof(int32_t));
memset(dst_ptr, 0, (size_t)output_plane * C4NUM * sizeof(int32_t));
for (int ih = 0; ih < conv_param->input_h_; ih++) {
for (int iw = 0; iw < conv_param->input_w_; iw++) {
@ -81,7 +81,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
} /*ih*/
} /*oc*/
PostFuncInt8C4(tmp, bias, out, output_channel, output_plane, conv_param->output_channel_,
PostFuncInt8C4(tmp, bias, out, output_channel, (size_t)output_plane, conv_param->output_channel_,
conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0],
conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]);

View File

@ -39,7 +39,7 @@ int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg)
if (arg->relu6_multiplier_exponent < 0) {
relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent);
}
relu6_value = (relu6_value + (1 << 15)) >> 1;
relu6_value = (size_t)(relu6_value + (1 << 15)) >> 1;
const int16_t preshift_output_value =
SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale);

View File

@ -104,7 +104,7 @@ void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
for (int ri = 0; ri < row_4div; ri += C4NUM) {
for (int ci = 0; ci < col_16div; ci += C16NUM) {
size_t col_offset = col;
size_t col_offset = (size_t)col;
int8_t *src_c = src_r + ci;
int8_t *dst_c = dst_r + ci * C4NUM;
#ifdef ENABLE_ARM64
@ -207,7 +207,7 @@ void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
int c2div = c / C2NUM, c2mod = c % C2NUM;
size_t ci = r * stride + c;
int32_t value = 0;
for (int d = 0; d < deep_16; d++) {
for (int d = 0; d < (int)deep_16; d++) {
int d16div = d / C16NUM, d16mod = d % C16NUM;
size_t ai = r4div * deep_16 * C4NUM + d16div * C4NUM * C16NUM + r4mod * C16NUM + d16mod;
size_t bi = c2div * deep_16 * C2NUM + d16div * C2NUM * C16NUM + c2mod * C16NUM + d16mod;
@ -269,9 +269,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
#endif
void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
size_t per_channel) {
size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel) {
/* row8x4-major * row4x8-major => (int8)row-major */
for (int r = 0; r < row; r++) {
for (int c = 0; c < col; c++) {
@ -279,7 +279,7 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
int c8div = c / C8NUM, c8mod = c % C8NUM;
size_t ci = r * stride + c;
int32_t value = 0;
for (int d = 0; d < deep_4; d++) {
for (int d = 0; d < (int)deep_4; d++) {
int d4div = d / C4NUM, d4mod = d % C4NUM;
size_t ai = r8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + r8mod * C4NUM + d4mod;
size_t bi = c8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + c8mod * C4NUM + d4mod;
@ -302,9 +302,9 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
}
void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
size_t per_channel, int32_t *filter_zp) {
size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, const int32_t *filter_zp) {
/* row4x4-major * row4x16-major => (int8)row-major */
for (int r = 0; r < row; r++) {
for (int c = 0; c < col; c++) {
@ -312,7 +312,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
int c16div = c / C16NUM, c16mod = c % C16NUM;
size_t ci = r * stride + c;
int32_t value = 0;
for (int d = 0; d < deep_4; d++) {
for (int d = 0; d < (int)deep_4; d++) {
int d4div = d / C4NUM, d4mod = d % C4NUM;
size_t ai = r4div * deep_4 * C4NUM + d4div * C4NUM * C4NUM + r4mod * C4NUM + d4mod;
size_t bi = c16div * deep_4 * C16NUM + d4div * C16NUM * C4NUM + c16mod * C4NUM + d4mod;
@ -453,7 +453,7 @@ void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input,
#else
int32_t tmp_sum_value[4] = {0};
for (int ici = 0; ici < ic_4div; ici += C4NUM) {
for (int i = 0; i < C4NUM; i++) {
for (size_t i = 0; i < C4NUM; i++) {
tmp_sum_value[i] += src_ic[0 + i * input_channel];
tmp_sum_value[i] += src_ic[1 + i * input_channel];
tmp_sum_value[i] += src_ic[2 + i * input_channel];

View File

@ -42,9 +42,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
/* optimize conv */
void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
size_t per_channel);
size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel);
/* 4x16 16x2 -> 4x2 */
/* arm32 conv1x1 */
@ -61,9 +61,9 @@ void RowMajor2Row4x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum,
size_t input_channel, size_t plane_size, int32_t filter_zp);
void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
size_t per_channel, int32_t *filter_zp);
size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, const int32_t *filter_zp);
#ifdef ENABLE_ARM64
void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums,

View File

@ -27,10 +27,10 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i
return vqmovn_s32(raw_sum);
}
void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
MulQuantArg *quant_arg, int *index) {
void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
const MulQuantArg *quant_arg, int *index) {
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
@ -104,8 +104,8 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data,
}
#endif
void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count,
bool input1_broad, MulQuantArg *quant_arg) {
void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg) {
// input0 need broadcast
int32_t zp1 = quant_arg->in_quant_args_[0].zp_;
int32_t zp2 = quant_arg->in_quant_args_[1].zp_;
@ -215,8 +215,8 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int
return;
}
void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
MulQuantArg *quant_arg) {
void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
const MulQuantArg *quant_arg) {
int index = 0;
#ifdef ENABLE_NEON
MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index);

View File

@ -28,9 +28,10 @@
#ifdef __cplusplus
extern "C" {
#endif
void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg);
void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count,
bool input1_broad, MulQuantArg *quant_arg);
void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
const MulQuantArg *quant_arg);
void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg);
#ifdef __cplusplus
}
#endif

View File

@ -849,7 +849,8 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara
}
}
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) {
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data,
const ConvParameter *conv_param) {
// origin weight format : ohwi
int input_channel = conv_param->input_channel_;
int ic8 = input_channel / C8NUM * C8NUM;

View File

@ -40,7 +40,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, const int32_t
const ConvParameter *conv_param);
void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16);
void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param);
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param);
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, const ConvParameter *conv_param);
void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num,
int block_index, const int32_t *filter_zp, int32_t *input_sum,
const ConvParameter *conv_param, bool per_channel, bool is_optimize);

View File

@ -26,7 +26,7 @@ int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dim
for (int w = 0; w < in_dims[2]; w++) {
const int8_t *in = in_data + Offset(in_dims, n, h, w, 0);
int8_t *out = out_data + Offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
memcpy(out, in, copy_size * sizeof(int8_t));
memcpy(out, in, (size_t)copy_size * sizeof(int8_t));
}
}
}

View File

@ -112,7 +112,7 @@ int UInt8ToInt8(const uint8_t *real_values, int8_t *quant_values, int size) {
}
for (int i = 0; i < size; ++i) {
int temp = real_values[i] - 128;
int temp = (int)real_values[i] - 128;
if (temp > 127) {
quant_values[i] = 127;
} else if (temp < -128) {

View File

@ -34,8 +34,8 @@ int16x4_t ClacSumHalfWordMul3(int32x4_t scaled_input0, int32x4_t scaled_input1,
const ScaleParameter *scale_param) {
int32x4_t output_multiplier_vec = vdupq_n_s32(scale_param->scale_mul_arg_.multiplier_);
int32x4_t output_multiplier_vec2 = vdupq_n_s32(scale_param->offset_mul_arg_.multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << scale_param->scale_mul_arg_.left_shift_);
int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << scale_param->offset_mul_arg_.left_shift_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)(scale_param->scale_mul_arg_.left_shift_));
int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << (size_t)(scale_param->offset_mul_arg_.left_shift_));
int32x4_t input_scale = vmulq_s32(scaled_input0, scaled_input1);
int32x4_t raw_sum = RoundingDivideByPOTInt32x4(
SaturatingRoundingDoublingHighMulInt32x4(vmulq_s32(input_scale, left_shift_out_vec), output_multiplier_vec),

View File

@ -24,7 +24,7 @@
#ifdef ENABLE_NEON
int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
int32x4_t output_multiplier_vec, SubQuantArg *para) {
int32x4_t output_multiplier_vec, const SubQuantArg *para) {
int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1);
raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec),
@ -35,14 +35,14 @@ int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, in
return vqmovn_s32(raw_data);
}
void SubInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
SubQuantArg *para, int *index) {
void SubInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
const SubQuantArg *para, int *index) {
int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_);
int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_);
int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_);
int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_);
int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32((1 << para->left_shift_out_));
int32x4_t left_shift_out_vec = vdupq_n_s32((1 << (size_t)para->left_shift_out_));
int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_);
int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_);

View File

@ -226,16 +226,16 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu
const int *strides = transpose_param->strides_;
const int *out_strides = transpose_param->out_strides_;
int num_axes = transpose_param->num_axes_;
size_t data_size = (*out_strides) * output_shape[0];
size_t data_size = (size_t)((*out_strides) * output_shape[0]);
size_t offset_size = UP_DIV(data_size, thread_num);
size_t task_offset = offset_size * task_id;
int count = data_size - task_offset;
if (count <= 0) {
size_t count = data_size - task_offset;
if (data_size < task_offset) {
return;
}
count = MSMIN(offset_size, count);
for (size_t idx = task_offset; idx < task_offset + count; ++idx) {
int pos = idx;
int pos = (int)idx;
int output_idx = 0;
int input_idx = 0;
for (int i = 0; i < num_axes; ++i) {

View File

@ -24,7 +24,7 @@ int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParamete
float input_scale = para_->quant_arg.in_quant_args_.scale_;
int8_t input_zp = para_->quant_arg.in_quant_args_.zp_;
for (int i = task_id; i < data_size; i += para_->thread_count_) {
for (int i = task_id; i < (int)data_size; i += para_->thread_count_) {
output_ptr[i] = output_zp + round(1 / output_scale * input_scale * (input_ptr[i] - input_zp));
}
return 0;

View File

@ -23,14 +23,15 @@ typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int *dst, i
const int *input_sum, const int *bias);
typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel);
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int *filter_zp);
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int *filter_zp);
typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } OutType;

View File

@ -165,7 +165,7 @@ int Conv2DINT8Coder::InitWeightBias(CoderContext *const context) {
}
int Conv2DINT8Coder::Prepare(CoderContext *const context) {
Conv2DBaseCoder::Init();
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
CheckSupportOptimize();
MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed!");
MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");

View File

@ -24,7 +24,7 @@
namespace mindspore::lite::micro {
int ConvolutionDepthwiseINT8Coder::Prepare(CoderContext *const context) {
Conv2DBaseCoder::Init();
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
// init sliding window param
MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed.");
MS_CHECK_RET_CODE(InitWeightBias(context), "dwconvolution do init weightbais failed");

View File

@ -69,7 +69,7 @@ int ReduceInt8Coder::CalculateQuantArgs() {
QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift);
qm->left_shift_ = shift < 0 ? -shift : 0;
qm->right_shift_ = shift > 0 ? shift : 0;
mean_multipliers_.push_back(qm);
prod_multipliers_.push_back(qm);
}
}

View File

@ -30,7 +30,21 @@ class ReduceInt8Coder final : public ReduceBaseCoder {
const Model::Node *node, size_t node_index, Target target)
: ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
~ReduceInt8Coder() override { begin_src_data_ = nullptr; }
~ReduceInt8Coder() override {
begin_src_data_ = nullptr;
for (auto &arg : mean_multipliers_) {
delete arg;
arg = nullptr;
}
for (auto &arg : prod_multipliers_) {
delete arg;
arg = nullptr;
}
for (auto &arg : sum_square_multipliers_) {
delete arg;
arg = nullptr;
}
}
int Prepare(CoderContext *const context) override;
int DoCode(CoderContext *const context) override;

View File

@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_Softmax;
namespace mindspore::lite::micro::nnacl {
int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
SoftmaxBaseCoder::Init();
MS_CHECK_RET_CODE(SoftmaxBaseCoder::Init(), "Softmax base init failed.");
std::vector<LiteQuantParam> in_quant_args = input_tensor_->quant_params();
quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale;
quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint;
@ -59,8 +59,7 @@ int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
sum_data_size_ = inner_size * sizeof(int);
sum_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace));
MS_CHECK_PTR(sum_data_);
ReSize();
return RET_OK;
return ReSize();
}
int SoftMaxInt8Coder::DoCode(CoderContext *const context) {

View File

@ -20,11 +20,12 @@ extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, in
const int *input_sum, const int *bias);
extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
size_t peroc);
const int *multiplier, const int *left_shift, const int *right_shift, int row, int col,
int stride, size_t peroc);
extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
const int *multiplier, const int *left_shift, const int *right_shift, size_t stride,
size_t peroc, const int *filter_zp);
#ifdef ENABLE_ARM64
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
@ -33,16 +34,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
}
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel) {
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
}
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int32_t *filter_zp) {
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp) {
return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
right_shift, stride, per_channel, filter_zp);
}

View File

@ -29,13 +29,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
const int *input_sum, const int *bias);
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel);
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int32_t *filter_zp);
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp);
#endif
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_

View File

@ -35,7 +35,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
memset(packed_weight_, 0, size);
RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel);
/* bias */
size = UP_ROUND(output_channel, C2NUM);
size = (size_t)UP_ROUND(output_channel, C2NUM);
int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t));
if (bias_data_ == NULL) {
free(packed_weight_);
@ -43,7 +43,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
}
memset(bias_data_, 0, size * sizeof(int32_t));
if (src_bias != NULL) {
memcpy(bias_data_, src_bias, output_channel * sizeof(int32_t));
memcpy(bias_data_, src_bias, (size_t)output_channel * sizeof(int32_t));
}
#else
/* InitWeightBias */

View File

@ -42,7 +42,7 @@ class ArithmeticInt8CPUKernel : public InnerKernel {
int8_t *tile_data0_{nullptr};
int8_t *tile_data1_{nullptr};
ArithmeticRunInt8 arithmetic_run_{nullptr};
ArithmeticQuantArg quant_args_;
ArithmeticQuantArg quant_args_ = {};
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_

View File

@ -48,12 +48,12 @@ int BatchnormInt8CPUKernel::InitConstTensor() {
auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
if (alpha_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
if (beta_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@ -92,12 +92,12 @@ int BatchnormInt8CPUKernel::InitFusedConstTensor() {
auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
if (alpha_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
if (beta_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;

View File

@ -59,11 +59,12 @@ int ConcatInt8CPUKernel::Init() {
}
int ConcatInt8CPUKernel::ReSize() {
concat_param_->axis_ =
concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_;
concat_param_->axis_ = concat_param_->axis_ >= 0
? concat_param_->axis_
: static_cast<int>(in_tensors_.front()->shape().size()) + concat_param_->axis_;
auto input_num = in_tensors_.size();
concat_param_->input_num_ = input_num;
concat_param_->input_num_ = static_cast<int>(input_num);
concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num));
if (concat_param_->input_shapes_ == nullptr) {
MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed.";
@ -97,7 +98,7 @@ int ConcatInt8CPUKernel::ReSize() {
memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(),
sizeof(int) * output_dim);
for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) {
for (size_t i = static_cast<size_t>(concat_param_->axis_ + 1); i < output_dim; i++) {
after_axis_size *= concat_param_->output_shapes_[i];
}
concat_param_->after_axis_size = after_axis_size;
@ -122,21 +123,17 @@ int ConcatInt8CPUKernel::Run() {
int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto concat = reinterpret_cast<ConcatInt8CPUKernel *>(cdata);
auto ret = concat->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConcatInt8Run task_id " << task_id << " failed.";
return ret;
}
concat->DoExecute(task_id);
return lite::RET_OK;
}
int ConcatInt8CPUKernel::DoExecute(int task_id) {
void ConcatInt8CPUKernel::DoExecute(int task_id) {
int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) {
return lite::RET_OK;
return;
}
Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id);
return lite::RET_OK;
return;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator<ConcatInt8CPUKernel>)

View File

@ -57,7 +57,7 @@ class ConcatInt8CPUKernel : public InnerKernel {
int Init() override;
int ReSize() override;
int Run() override;
int DoExecute(int task_id);
void DoExecute(int task_id);
private:
int64_t before_axis_size = 0;

View File

@ -25,7 +25,7 @@ namespace mindspore::kernel {
namespace {
constexpr size_t kUnitBufferMultipler = 4 * 4;
} // namespace
int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) {
int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param) {
auto input_channel = conv_param->input_channel_;
auto output_channel = conv_param->output_channel_;
auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
@ -116,7 +116,7 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() {
memset(bias_data_, 0, new_bias_size);
if (in_tensors_.size() == kInputSize2) {
auto ori_bias_addr = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->MutableData());
memcpy(bias_data_, ori_bias_addr, output_channel * sizeof(int32_t));
memcpy(bias_data_, ori_bias_addr, static_cast<size_t>(output_channel) * sizeof(int32_t));
} else {
MS_ASSERT(in_tensors_.size() == kInputSize1);
}

View File

@ -46,7 +46,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
int32_t *tmp_dst_buffer_ = nullptr;
int8_t *tmp_out_ = nullptr;
};
int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param);
int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param);
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_

View File

@ -60,13 +60,13 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(),
weight_tensor->Batch());
packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
free(tmp_weight);
return RET_ERROR;
}
bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
if (filter_per_channel) {
for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
for (int c = 0; c < channel; c++) {
@ -87,16 +87,16 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
}
free(tmp_weight);
bias_data_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
memset(bias_data_, 0, channel * sizeof(int32_t));
memset(bias_data_, 0, static_cast<size_t>(channel) * sizeof(int32_t));
if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
}
return RET_OK;
}
@ -153,7 +153,8 @@ int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() {
int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_;
buffer_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(buffer_size * sizeof(int8_t)));
buffer_ =
reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(buffer_size) * sizeof(int8_t)));
if (buffer_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;

View File

@ -55,7 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
return RET_ERROR;
}
bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
if (filter_per_channel) {
for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
for (int c = 0; c < channel; c++) {

View File

@ -42,7 +42,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData());
int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@ -50,16 +50,16 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
bias_data_ = reinterpret_cast<int32_t *>(malloc(C8NUM * OC8 * sizeof(int32_t)));
bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t)));
if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
memset(bias_data_, 0, C8NUM * OC8 * sizeof(int32_t));
memset(bias_data_, 0, static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t));
if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
}
conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
@ -72,7 +72,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM *
UP_DIV(conv_param_->input_channel_, C8NUM);
packed_input_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_input_size * sizeof(int8_t)));
packed_input_ =
reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(pack_input_size) * sizeof(int8_t)));
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@ -80,7 +81,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM *
UP_DIV(conv_param_->output_channel_, C8NUM);
packed_output_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
packed_output_ = reinterpret_cast<int8_t *>(
ms_context_->allocator->Malloc(static_cast<size_t>(pack_output_size) * sizeof(int8_t)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
@ -150,10 +152,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
auto input_tensor = in_tensors_.at(kInputIndex);
auto channel = conv_param_->input_channel_;
input_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
input_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
MSLITE_CHECK_PTR(input_scale_);
input_zp_ = reinterpret_cast<int8_t *>(malloc(channel * sizeof(int8_t)));
input_zp_ = reinterpret_cast<int8_t *>(malloc(static_cast<size_t>(channel) * sizeof(int8_t)));
MSLITE_CHECK_PTR(input_zp_);
if (input_tensor->quant_params().size() == kPerTensor) {
@ -171,10 +173,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
}
auto output_tensor = out_tensors_.at(kOutputIndex);
output_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
output_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
MSLITE_CHECK_PTR(output_scale_);
output_zp_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
output_zp_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(output_zp_);
if (output_tensor->quant_params().size() == kPerTensor) {
@ -191,25 +193,26 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
}
}
conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(channel * sizeof(double)));
conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(static_cast<size_t>(channel) * sizeof(double)));
MSLITE_CHECK_PTR(conv_quant_arg_->real_multiplier_);
conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->left_shift_);
conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->right_shift_);
conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
conv_quant_arg_->quant_multiplier_ =
reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->quant_multiplier_);
conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->out_act_min_);
conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
MSLITE_CHECK_PTR(conv_quant_arg_->out_act_max_);
weight_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
weight_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
MSLITE_CHECK_PTR(weight_scale_);
auto weight_tensor = in_tensors_.at(kWeightIndex);

View File

@ -98,12 +98,12 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
memset(bias_data_, 0, bias_size);
if (in_tensors_.size() == kInputSize2) {
auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->data_c());
memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t));
memcpy(bias_data_, ori_bias, static_cast<size_t>(output_channel) * sizeof(int32_t));
} else {
MS_ASSERT(in_tensors_.size() == kInputSize1);
}
auto *bias_data = reinterpret_cast<int32_t *>(bias_data_);
bool filter_peroc = conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL;
bool filter_peroc = static_cast<bool>(conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL);
if (filter_peroc) {
filter_zp_ptr_ = reinterpret_cast<int32_t *>(malloc(output_channel * sizeof(int32_t)));
if (filter_zp_ptr_ == nullptr) {
@ -126,9 +126,9 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
size_t input_sum_size;
if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
input_sum_size = up_round_oc * tile_num_ * thread_count_ * sizeof(int32_t);
input_sum_size = static_cast<size_t>(up_round_oc * tile_num_ * thread_count_) * sizeof(int32_t);
} else {
input_sum_size = tile_num_ * thread_count_ * sizeof(int32_t);
input_sum_size = static_cast<size_t>(tile_num_ * thread_count_) * sizeof(int32_t);
}
input_sum_ = reinterpret_cast<int32_t *>(malloc(input_sum_size));
if (input_sum_ == nullptr) {

View File

@ -57,21 +57,16 @@ int CropInt8CPUKernel::Run() {
int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto crop = reinterpret_cast<CropInt8CPUKernel *>(cdata);
auto ret = crop->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "CropInt8Run task id " << task_id << " run failed.";
return ret;
}
crop->DoExecute(task_id);
return RET_OK;
}
int CropInt8CPUKernel::DoExecute(int task_id) {
void CropInt8CPUKernel::DoExecute(int task_id) {
auto input_tensor = in_tensors_.at(kInputIndex);
auto out_tensor = out_tensors_.at(kOutputIndex);
int8_t *input_data = reinterpret_cast<int8_t *>(input_tensor->data_c());
int8_t *output_data = reinterpret_cast<int8_t *>(out_tensor->data_c());
Int8Crop(input_data, output_data, task_id, crop_para_);
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, LiteKernelCreator<CropInt8CPUKernel>)

View File

@ -36,7 +36,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel {
int Init() override;
int ReSize() override;
int Run() override;
int DoExecute(int task_id);
void DoExecute(int task_id);
};
int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);

View File

@ -57,7 +57,7 @@ int GatherNdInt8CPUKernel::ReSize() {
auto indices_tensor = in_tensors_.at(1);
auto indices_shape = indices_tensor->shape();
int indices_rank = indices_shape.size();
int indices_rank = static_cast<size_t>(indices_shape.size());
count_ = 1;
for (int i = 0; i < indices_rank - 1; ++i) {
count_ *= indices_shape[i];
@ -66,12 +66,12 @@ int GatherNdInt8CPUKernel::ReSize() {
MS_LOG(ERROR) << "count_ is invalid, count_: " << count_;
return RET_ERROR;
}
in_offset_ = reinterpret_cast<int *>(malloc(count_ * sizeof(int)));
in_offset_ = reinterpret_cast<int *>(malloc(static_cast<size_t>(count_) * sizeof(int)));
if (in_offset_ == nullptr) {
MS_LOG(ERROR) << "GatherNdInt8 Malloc in_offset_ error!";
return RET_ERROR;
}
(void)memset(in_offset_, 0, count_ * sizeof(int));
(void)memset(in_offset_, 0, static_cast<size_t>(count_) * sizeof(int));
thread_sz_count_ = MSMIN(thread_count_, count_);
if (thread_sz_count_ == 0) {
MS_LOG(ERROR) << "div zero";
@ -85,9 +85,9 @@ int GatherNdInt8CPUKernel::InitOffset() {
auto ind_quant_args = in_tensors_.at(1)->quant_params();
auto indices_tensor = in_tensors_.at(1);
auto indices_shape = indices_tensor->shape();
int indices_rank = indices_shape.size();
int indices_rank = static_cast<size_t>(indices_shape.size());
auto in_shape = in_tensors_.front()->shape();
int in_rank = in_shape.size();
int in_rank = static_cast<size_t>(in_shape.size());
if (indices_rank < 1) {
MS_LOG(ERROR) << "inex out of bounds";
return RET_ERROR;

View File

@ -44,7 +44,7 @@ class GatherNdInt8CPUKernel : public InnerKernel {
int *in_offset_ = nullptr;
int8_t *in_ptr_ = nullptr;
int8_t *out_ptr_ = nullptr;
GatherQuantArg param_;
GatherQuantArg param_ = {};
};
} // namespace mindspore::kernel

View File

@ -29,7 +29,7 @@ int GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) {
int8_t *src_ptr = reinterpret_cast<int8_t *>(ori_in_data_) + group_id * sub_in_channel;
int8_t *dst_ptr = sub_in_data;
for (int i = 0; i < in_plane; ++i) {
memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(int8_t));
memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_in_channel) * sizeof(int8_t));
src_ptr += ori_in_channel;
dst_ptr += sub_in_channel;
}
@ -45,7 +45,7 @@ int GroupConvolutionInt8CPUKernel::PostConcat(int group_id) {
int8_t *src_ptr = sub_out_data;
int8_t *dst_ptr = reinterpret_cast<int8_t *>(ori_out_data_) + group_id * sub_out_channel;
for (int i = 0; i < out_plane; ++i) {
memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(int8_t));
memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_out_channel) * sizeof(int8_t));
src_ptr += sub_out_channel;
dst_ptr += ori_out_channel;
}

View File

@ -37,7 +37,7 @@ class HswishInt8CPUKernel : public InnerKernel {
private:
int thread_count_;
HswishQuantArg quant_arg_;
HswishQuantArg quant_arg_ = {};
void MultiplierInt32ToInt16(int32_t input, int16_t *output) const;
};
} // namespace mindspore::kernel

View File

@ -39,7 +39,7 @@ class LeakyReluInt8CPUKernel : public InnerKernel {
int DoExecute(int task_id);
private:
LeakyReluQuantArg quant_prelu_parm_;
LeakyReluQuantArg quant_prelu_parm_ = {};
};
} // namespace mindspore::kernel

View File

@ -187,29 +187,21 @@ int MulInt8CPUKernel::Run() {
int FastHWBroadcastMulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
auto ret = mul->FastDoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "FastHWBroadcastMulInt8Run task_id " << task_id << " failed.";
return ret;
}
mul->FastDoExecute(task_id);
return lite::RET_OK;
}
int MulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
auto ret = mul->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "MulInt8Run task_id " << task_id << " failed.";
return ret;
}
mul->DoExecute(task_id);
return lite::RET_OK;
}
int MulInt8CPUKernel::FastDoExecute(int task_id) {
void MulInt8CPUKernel::FastDoExecute(int task_id) {
int depth = out_tensors_.front()->Channel();
int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) {
return lite::RET_OK;
return;
}
int8_t *cur_input0_data = input0_data_;
int8_t *cur_input1_data = input1_data_ + task_id * count_unit_ * depth;
@ -219,20 +211,19 @@ int MulInt8CPUKernel::FastDoExecute(int task_id) {
cur_input1_data = input0_data_ + task_id * count_unit_ * depth;
}
FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_);
return RET_OK;
}
int MulInt8CPUKernel::DoExecute(int task_id) {
void MulInt8CPUKernel::DoExecute(int task_id) {
int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) {
return lite::RET_OK;
return;
}
int8_t *cur_input0_data = input0_data_ + task_id * count_unit_;
int8_t *cur_input1_data = input1_data_ + task_id * count_unit_;
int8_t *cur_output_data = output_data_ + task_id * count_unit_;
Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_);
return lite::RET_OK;
return;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_MulFusion, LiteKernelCreator<MulInt8CPUKernel>)

View File

@ -39,8 +39,8 @@ class MulInt8CPUKernel : public InnerKernel {
void CheckSameShapeSize(std::vector<int> in_tensor0_shape, std::vector<int> in_tensor1_shape);
void CheckIfFastImpl();
int Run() override;
int DoExecute(int task_id);
int FastDoExecute(int task_id);
void DoExecute(int task_id);
void FastDoExecute(int task_id);
private:
const lite::InnerContext *ctx_ = nullptr;

View File

@ -30,16 +30,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
}
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel) {
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
}
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int32_t *filter_zp) {
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp) {
return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
right_shift, stride, per_channel, filter_zp);
}

View File

@ -25,11 +25,11 @@ extern "C" {
void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
const int *input_sum, const int *bias);
void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums,
const int *bias, int act_min, int act_max, int out_zp, int *multiplier, int *left_shift,
int *right_shift, int row, int col, int stride, size_t peroc);
const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
const int *left_shift, const int *right_shift, int row, int col, int stride, size_t peroc);
void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
const int *left_shift, const int *right_shift, size_t stride, size_t peroc, const int *filter_zp);
#ifdef ENABLE_ARM64
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
@ -40,13 +40,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
const int *input_sum, const int *bias);
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel);
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int32_t *filter_zp);
size_t stride, const int32_t *input_sum, const int32_t *bias,
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
const int32_t *filter_zp);
#endif
#ifdef __cplusplus

View File

@ -85,7 +85,7 @@ int PadInt8CPUKernel::SetQuantParam() {
int PadInt8CPUKernel::InitPadParam() {
auto in_dims = in_tensors_.at(0)->shape();
auto out_dims = out_tensors_.at(0)->shape();
int ndims = in_dims.size();
int ndims = static_cast<size_t>(in_dims.size());
int in[] = {1, 1, 1, 1};
int out[] = {1, 1, 1, 1};
@ -267,7 +267,8 @@ int PadInt8CPUKernel::Run() {
int error_code;
if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0],
static_cast<size_t>(out_tensors_[0]->ElementsNum()) * sizeof(int8_t));
error_code = ParallelLaunch(this->ms_context_, PadInt8Impl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";

View File

@ -93,7 +93,7 @@ class ReduceInt8CPUKernel : public ReduceBaseCPUKernel {
bool valid_shape_ = false;
bool pattern_impl_ = false;
Four_DIMENSION_REDUCE_TEMPLATE pattern_;
QuantMulArg reduce_mean_quant_param_; // used in reduce mean 4D situation
QuantMulArg reduce_mean_quant_param_ = {}; // used in reduce mean 4D situation
Reducer reducer_ = nullptr;
LastReducer last_reducer_ = nullptr;
std::vector<QuantMulArg *> mean_multipliers_;

View File

@ -37,7 +37,7 @@ class ReluXInt8CPUKernel : public InnerKernel {
int Run() override;
int DoActivation(int task_id);
ReluXQuantArg quant_arg_;
ReluXQuantArg quant_arg_ = {};
private:
int type_{0};

View File

@ -63,18 +63,14 @@ int ReshapeInt8CPUKernel::Run() {
int ReshapeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto reshape = reinterpret_cast<ReshapeInt8CPUKernel *>(cdata);
auto ret = reshape->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Reshapeint8Run task_id " << task_id << " failed.";
return ret;
}
reshape->DoExecute(task_id);
return lite::RET_OK;
}
int ReshapeInt8CPUKernel::DoExecute(int task_id) {
void ReshapeInt8CPUKernel::DoExecute(int task_id) {
int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
if (real_dst_count <= 0) {
return lite::RET_OK;
return;
}
MS_ASSERT(input_data_);
MS_ASSERT(output_data_);
@ -82,7 +78,7 @@ int ReshapeInt8CPUKernel::DoExecute(int task_id) {
int8_t *cur_output_data = output_data_ + task_id * count_unit_;
Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_);
return lite::RET_OK;
return;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reshape, LiteKernelCreator<ReshapeInt8CPUKernel>)

View File

@ -37,7 +37,7 @@ class ReshapeInt8CPUKernel : public InnerKernel {
int Init() override;
int ReSize() override;
int Run() override;
int DoExecute(int task_id);
void DoExecute(int task_id);
private:
int64_t elements_num_ = 0;

View File

@ -37,20 +37,32 @@ constexpr unsigned int OFFSET_BASE = 10;
} // namespace
void ResizeInt8CPUKernel::FreeResizeBiLinear() {
free(resize_quant_arg_.x_axis_index_);
resize_quant_arg_.x_axis_index_ = nullptr;
free(resize_quant_arg_.x_axis_lower_);
resize_quant_arg_.x_axis_lower_ = nullptr;
free(resize_quant_arg_.x_axis_upper_);
resize_quant_arg_.x_axis_upper_ = nullptr;
free(resize_quant_arg_.y_axis_index_);
resize_quant_arg_.y_axis_index_ = nullptr;
free(resize_quant_arg_.y_axis_lower_);
resize_quant_arg_.y_axis_lower_ = nullptr;
free(resize_quant_arg_.y_axis_upper_);
resize_quant_arg_.y_axis_upper_ = nullptr;
}
void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() {
free(resize_float_quant_arg_.x_axis_index_);
resize_float_quant_arg_.x_axis_index_ = nullptr;
free(resize_float_quant_arg_.x_axis_lower_);
resize_float_quant_arg_.x_axis_lower_ = nullptr;
free(resize_float_quant_arg_.x_axis_upper_);
resize_float_quant_arg_.x_axis_upper_ = nullptr;
free(resize_float_quant_arg_.y_axis_index_);
resize_float_quant_arg_.y_axis_index_ = nullptr;
free(resize_float_quant_arg_.y_axis_lower_);
resize_float_quant_arg_.y_axis_lower_ = nullptr;
free(resize_float_quant_arg_.y_axis_upper_);
resize_float_quant_arg_.y_axis_upper_ = nullptr;
}
ResizeInt8CPUKernel::~ResizeInt8CPUKernel() {

View File

@ -52,8 +52,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
QuantArg *quant_in_{nullptr};
QuantArg *quant_out_{nullptr};
QuantMulArg *multiplier_{nullptr};
ResizeQuantArg resize_quant_arg_;
ResizeFloatScaleQuantArg resize_float_quant_arg_;
ResizeQuantArg resize_quant_arg_ = {};
ResizeFloatScaleQuantArg resize_float_quant_arg_ = {};
};
} // namespace mindspore::kernel

View File

@ -64,7 +64,7 @@ int SqueezeInt8CPUKernel::Init() {
auto quant_params = output_tensor->quant_params();
MS_ASSERT(quant_params.size() == 1);
quant_squeeze_param_->out_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
if (quant_squeeze_param_->in_quant_args_ == nullptr) {
if (quant_squeeze_param_->out_quant_args_ == nullptr) {
MS_LOG(ERROR) << "malloc QuantArg failed";
if (quant_squeeze_param_ != nullptr) {
if (quant_squeeze_param_->in_quant_args_ != nullptr) {
@ -97,15 +97,11 @@ int SqueezeInt8CPUKernel::Run() {
int SqueezeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
auto Squeeze = reinterpret_cast<SqueezeInt8CPUKernel *>(cdata);
auto ret = Squeeze->DoExecute(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SqueezeInt8Run task_id " << task_id << " failed.";
return ret;
}
Squeeze->DoExecute(task_id);
return RET_OK;
}
int SqueezeInt8CPUKernel::DoExecute(int task_id) {
void SqueezeInt8CPUKernel::DoExecute(int task_id) {
auto input_tensor = in_tensors_.at(kInputIndex);
MS_ASSERT(input_tensor);
auto out_tensor = out_tensors_.at(kOutputIndex);
@ -117,7 +113,6 @@ int SqueezeInt8CPUKernel::DoExecute(int task_id) {
int num = input_tensor->ElementsNum();
SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_);
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeInt8CPUKernel>)

View File

@ -36,7 +36,7 @@ class SqueezeInt8CPUKernel : public InnerKernel {
int Init() override;
int ReSize() override;
int Run() override;
int DoExecute(int tId);
void DoExecute(int tId);
private:
SqueezeQuantArg *quant_squeeze_param_{nullptr};

View File

@ -46,7 +46,7 @@ class TanhInt8CPUKernel : public InnerKernel {
int element_size_{0};
int thread_count_{0};
int thread_stride_{0};
TanhQuantParameter tanh_quant_;
TanhQuantParameter tanh_quant_ = {};
};
} // namespace mindspore::kernel

View File

@ -79,7 +79,7 @@ int TransposeInt8CPUKernel::DoTranspose(int task_id) {
return RET_OK;
}
void TransposeInt8CPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor,
void TransposeInt8CPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
const TransposeParameter *param) {
auto out_shape = out_tensor->shape();
if (in_tensor->shape().size() == DIMENSION_4D && param->perm_[0] == 0 && param->perm_[1] == 2 &&

View File

@ -44,7 +44,8 @@ class TransposeInt8CPUKernel : public InnerKernel {
int DoTranspose(int task_id);
private:
void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, const TransposeParameter *param);
void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
const TransposeParameter *param);
TransposeParameter *transpose_param_;
TransposeFunc NHNCTransposeFunc_ = nullptr;
int8_t *in_ptr_ = nullptr;