!23653 [MSLITE][Develop] clean code check warnings

Merge pull request !23653 from yangruoqi713/codex
This commit is contained in:
i-robot 2021-09-18 01:26:21 +00:00 committed by Gitee
commit 6fd0e67a7f
12 changed files with 72 additions and 65 deletions

View File

@ -68,7 +68,7 @@ void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data
} }
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean, int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
float *out_deno, LayerNormParameter *param, size_t task_id) { float *out_deno, const LayerNormParameter *param, size_t task_id) {
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) { if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
return NNACL_NULL_PTR; return NNACL_NULL_PTR;
} }

View File

@ -24,7 +24,7 @@ extern "C" {
#endif #endif
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean, int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
float *out_deno, LayerNormParameter *param, size_t task_id); float *out_deno, const LayerNormParameter *param, size_t task_id);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -154,20 +154,23 @@ void UpdataOutput(const float *cell_state, const float *output_gate, float *hidd
void UpdateLstmGate(float *gate_buffer, const float *input, const float *weight, const float *bias, int row, int deep, void UpdateLstmGate(float *gate_buffer, const float *input, const float *weight, const float *bias, int row, int deep,
int col, int col_align, bool is_vec, float *packed_ptr) { int col, int col_align, bool is_vec, float *packed_ptr) {
const float *weight_i = weight;
const float *bias_i = bias;
float *gate_i = gate_buffer;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
const float *weight_i; LstmMatMul(gate_i, input, weight_i, bias_i, row, deep, col, col_align, is_vec, packed_ptr);
#ifdef ENABLE_AVX #ifdef ENABLE_AVX
if (is_vec) { if (is_vec) {
weight_i = weight + deep * col_align * i; weight_i += deep * col_align;
} else { } else {
weight_i = weight + deep * col * i; weight_i += deep * col;
} }
#else #else
weight_i = weight + deep * col * i; weight_i += deep * col;
#endif #endif
const float *bias_i = bias + col_align * i; bias_i += col_align;
float *gate = gate_buffer + row * col * i; gate_i += row * col;
LstmMatMul(gate, input, weight_i, bias_i, row, deep, col, col_align, is_vec, packed_ptr);
} }
} }
@ -228,7 +231,7 @@ void LstmStepUnit(float *output, float *input_gate, float *forget_gate, float *c
void LstmUnidirectional(float *output, const float *packed_input, const float *weight_i, const float *weight_h, void LstmUnidirectional(float *output, const float *packed_input, const float *weight_i, const float *weight_h,
const float *input_bias, const float *state_bias, float *hidden_state, float *cell_state, const float *input_bias, const float *state_bias, float *hidden_state, float *cell_state,
float *buffer[6], const LstmParameter *lstm_param, bool is_backward) { float *buffer[7], const LstmParameter *lstm_param, bool is_backward) {
float *gate = buffer[1]; float *gate = buffer[1];
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
const float *weight_loop = weight_i + lstm_param->input_size_ * lstm_param->input_col_align_ * i; const float *weight_loop = weight_i + lstm_param->input_size_ * lstm_param->input_col_align_ * i;
@ -256,7 +259,7 @@ void LstmUnidirectional(float *output, const float *packed_input, const float *w
} }
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias, void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias,
const float *state_bias, float *hidden_state, float *cell_state, float *buffer[6], const float *state_bias, float *hidden_state, float *cell_state, float *buffer[7],
const LstmParameter *lstm_param) { const LstmParameter *lstm_param) {
// forward // forward
float *packed_input = buffer[0]; float *packed_input = buffer[0];

View File

@ -36,10 +36,10 @@ int ElementOptMulAcc(const float *input0, const float input1, float *output, con
void LstmStepUnit(float *output, float *input_gate, float *forget_gate, float *cell_gate, float *output_gate, void LstmStepUnit(float *output, float *input_gate, float *forget_gate, float *cell_gate, float *output_gate,
const float *state_weight, const float *state_bias, float *hidden_state, float *cell_state, const float *state_weight, const float *state_bias, float *hidden_state, float *cell_state,
float *buffer[6], const LstmParameter *lstm_param); float *buffer[7], const LstmParameter *lstm_param);
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias, void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias,
const float *state_bias, float *hidden_state, float *cell_state, float *buffer[6], const float *state_bias, float *hidden_state, float *cell_state, float *buffer[7],
const LstmParameter *lstm_param); const LstmParameter *lstm_param);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -34,17 +34,17 @@ void PostFuncInt8C4(const int32_t *in, const int32_t *bias, int8_t *out, size_t
void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels, void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels,
int output_channel, int input_step, int8_t input_zp); int output_channel, int input_step, int8_t input_zp);
void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp, void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp,
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t acc_min, const int32_t *out_multiplier, const int32_t *left_shift,
int32_t acc_max); const int32_t *right_shift, int32_t acc_min, int32_t acc_max);
void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier, void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier,
int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max); int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max);
void IndirectGemmInt16to32_8x4(int32_t *dst, const int16_t *src, const int16_t *weight, size_t ksize, size_t ic8, void IndirectGemmInt16to32_8x4(int32_t *dst, const int16_t *src, const int16_t *weight, size_t ksize, size_t ic8,
size_t oc4, size_t offset); size_t oc4, size_t offset);
void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height, void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height,
size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel,
size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, int8_t *in_zp, size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, const int8_t *in_zp,
int32_t *out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, const int32_t *out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
int32_t *acc_min, int32_t *acc_max); const int32_t *right_shift, const int32_t *acc_min, const int32_t *acc_max);
void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width, void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width,
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step); size_t in_sw_step, size_t in_kh_step, size_t in_kw_step);
@ -59,8 +59,8 @@ int32x4_t ClacScaledInput(int32x4_t input, int32x4_t left_shift_result_vec, int3
#ifdef ENABLE_ARM32 #ifdef ENABLE_ARM32
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t acc_min, const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
int32_t acc_max, size_t per_channel); int32_t acc_min, int32_t acc_max, size_t per_channel);
#endif #endif
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
@ -69,23 +69,24 @@ void PostFuncInt8C4Neon64(const int32_t *in, const int32_t *bias, int8_t *out, s
int32_t zp, int32_t mini, int32_t maxi); int32_t zp, int32_t mini, int32_t maxi);
void ConvDw3x3Int8Neon64(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias, void ConvDw3x3Int8Neon64(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp, int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
int32_t out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
int32_t acc_min, int32_t acc_max, size_t per_channel); const int32_t *right_shift, int32_t acc_min, int32_t acc_max, size_t per_channel);
void ConvDw3x3Int8Stride2(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias, void ConvDw3x3Int8Stride2(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp, int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
int32_t out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
int32_t acc_min, int32_t acc_max, size_t per_channel); const int32_t *right_shift, int32_t acc_min, int32_t acc_max, size_t per_channel);
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step, void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step,
size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, int32_t *out_multiplier, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, const int32_t *out_multiplier,
int32_t *left_shift, int32_t *right_shift, size_t acc_min, size_t acc_max, size_t per_channel); const int32_t *left_shift, const int32_t *right_shift, size_t acc_min, size_t acc_max,
size_t per_channel);
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias,
size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, size_t acc_min, const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
size_t acc_max, size_t per_channel); size_t acc_min, size_t acc_max, size_t per_channel);
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias,
size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, size_t acc_min, const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
size_t acc_max, size_t per_channel); size_t acc_min, size_t acc_max, size_t per_channel);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -33,8 +33,9 @@ void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *
} }
#endif #endif
void ConvDwInt8Post(int8_t *dst, int32_t *buffer, int output_w, int channel, int32_t output_zp, int32_t *out_multiplier, void ConvDwInt8Post(int8_t *dst, int32_t *buffer, int output_w, int channel, int32_t output_zp,
int32_t *left_shift, int32_t *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) { const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
int32_t acc_min, int32_t acc_max, bool per_channel) {
if (per_channel) { if (per_channel) {
// support perchannel // support perchannel
for (int w = 0; w < output_w; w++) { for (int w = 0; w < output_w; w++) {
@ -207,8 +208,8 @@ void ConvDw3x3Int8Window(int8_t *output, const int8_t *buffer, const int16_t *we
void ConvDw3x3Int8Block(int8_t *output, const int8_t *buffer, const int16_t *weight, const int32_t *bias, int start_c, void ConvDw3x3Int8Block(int8_t *output, const int8_t *buffer, const int16_t *weight, const int32_t *bias, int start_c,
int end_c, int col_size, int row_size, int channel, int output_h, int output_w, int8_t in_zp, int end_c, int col_size, int row_size, int channel, int output_h, int output_w, int8_t in_zp,
int32_t out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
int32_t acc_min, int32_t acc_max, int stride, bool per_channel) { const int32_t *right_shift, int32_t acc_min, int32_t acc_max, int stride, bool per_channel) {
for (; start_c <= end_c - 8; start_c += 8) { for (; start_c <= end_c - 8; start_c += 8) {
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
if (stride == 1) { if (stride == 1) {
@ -330,8 +331,8 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data
#ifndef ENABLE_ARM32 #ifndef ENABLE_ARM32
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t acc_min, const int *out_multiplier, const int *left_shift, const int *right_shift,
int32_t acc_max, bool per_channel) { const int32_t acc_min, const int32_t acc_max, bool per_channel) {
for (int c = 0; c < channel; c += 8) { for (int c = 0; c < channel; c += 8) {
int tmp_buffer[8]; int tmp_buffer[8];
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
@ -385,22 +386,25 @@ void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *wei
#ifndef ENABLE_ARM64 #ifndef ENABLE_ARM64
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step, void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int *out_multiplier, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
int *left_shift, int *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) { const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
bool per_channel) {
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier, ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
left_shift, right_shift, acc_min, acc_max, per_channel); left_shift, right_shift, acc_min, acc_max, per_channel);
} }
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step, void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int *out_multiplier, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
int *left_shift, int *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) { const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
bool per_channel) {
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 3, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier, ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 3, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
left_shift, right_shift, acc_min, acc_max, per_channel); left_shift, right_shift, acc_min, acc_max, per_channel);
} }
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step, void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int *out_multiplier, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
int *left_shift, int *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) { const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
bool per_channel) {
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 3, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier, ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 3, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
left_shift, right_shift, acc_min, acc_max, per_channel); left_shift, right_shift, acc_min, acc_max, per_channel);
} }
@ -494,9 +498,9 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16
/*conv depthwise sliding window perchannel int8 begin*/ /*conv depthwise sliding window perchannel int8 begin*/
void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, int32_t *out_zp, int width, int in_kh_step, int in_kw_step, int kernel_w, const int8_t *input_zp,
const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t *acc_min, const int32_t *out_zp, const int *out_multiplier, const int *left_shift,
int32_t *acc_max) { const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
int tmp_buffer[C8NUM]; int tmp_buffer[C8NUM];
for (int i = 0; i < C8NUM; i++) { for (int i = 0; i < C8NUM; i++) {
tmp_buffer[i] = 0; tmp_buffer[i] = 0;
@ -531,7 +535,7 @@ void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight
void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom, void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom,
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, const int *left_shift, const int8_t *in_zp, const int32_t *out_zp, const int *out_multiplier, const int *left_shift,
const int *right_shift, int32_t *acc_min, int32_t *acc_max) { const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
int8_t *dst_h = dst + top * sliding->out_h_step_; int8_t *dst_h = dst + top * sliding->out_h_step_;
for (int oh = top; oh < bottom; oh++) { for (int oh = top; oh < bottom; oh++) {
@ -613,7 +617,7 @@ void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, con
#endif #endif
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, const int8_t *input_zp, const int32_t *output_zp, const ConvParameter *conv_param,
const SlidingWindowParam *sliding, int task_id) { const SlidingWindowParam *sliding, int task_id) {
NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_); NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_);
NNACL_CHECK_ZERO_RETURN(conv_param->dilation_w_); NNACL_CHECK_ZERO_RETURN(conv_param->dilation_w_);
@ -631,8 +635,8 @@ void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *
int *right_shift = conv_param->conv_quant_arg_.right_shift_ + oc * C8NUM; int *right_shift = conv_param->conv_quant_arg_.right_shift_ + oc * C8NUM;
int *acc_min = conv_param->conv_quant_arg_.out_act_min_ + oc * C8NUM; int *acc_min = conv_param->conv_quant_arg_.out_act_min_ + oc * C8NUM;
int *acc_max = conv_param->conv_quant_arg_.out_act_max_ + oc * C8NUM; int *acc_max = conv_param->conv_quant_arg_.out_act_max_ + oc * C8NUM;
int8_t *in_zp = input_zp + oc * C8NUM; const int8_t *in_zp = input_zp + oc * C8NUM;
int32_t *out_zp = output_zp + oc * C8NUM; const int32_t *out_zp = output_zp + oc * C8NUM;
ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param,
sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);

View File

@ -35,7 +35,7 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data
int task_id); int task_id);
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, const int8_t *input_zp, const int32_t *output_zp, const ConvParameter *conv_param,
const SlidingWindowParam *sliding, int task_id); const SlidingWindowParam *sliding, int task_id);
void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *input_data, const int16_t *weight_data, void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *input_data, const int16_t *weight_data,

View File

@ -115,7 +115,7 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
} }
#ifdef ENABLE_ARM #ifdef ENABLE_ARM
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
@ -199,10 +199,10 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
for (; j < depth; ++j) { for (; j < depth; ++j) {
const int32_t input0_val = zp1 + input0_data[j]; const int32_t input0_val = zp1 + input0_data[j];
const int32_t input1_val = zp2 + input1_data[0]; const int32_t input1_val = zp2 + input1_data[0];
int32_t mul_result = int32_t mul_result = RoundingDivideByPOT(
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_), SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << (size_t)quant_arg->shift_left_),
quant_arg->output_multiplier_), quant_arg->output_multiplier_),
quant_arg->shift_right_); quant_arg->shift_right_);
mul_result += quant_arg->out_quant_arg_.zp_; mul_result += quant_arg->out_quant_arg_.zp_;
mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_; mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_;
@ -224,10 +224,10 @@ void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_da
for (; index < real_dst_count; ++index) { for (; index < real_dst_count; ++index) {
const int32_t input0_val = quant_arg->in_quant_args_[0].zp_ + input0_data[index]; const int32_t input0_val = quant_arg->in_quant_args_[0].zp_ + input0_data[index];
const int32_t input1_val = quant_arg->in_quant_args_[1].zp_ + input1_data[index]; const int32_t input1_val = quant_arg->in_quant_args_[1].zp_ + input1_data[index];
int32_t mul_result = int32_t mul_result = RoundingDivideByPOT(
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_), SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << (size_t)quant_arg->shift_left_),
quant_arg->output_multiplier_), quant_arg->output_multiplier_),
quant_arg->shift_right_); quant_arg->shift_right_);
mul_result += quant_arg->out_quant_arg_.zp_; mul_result += quant_arg->out_quant_arg_.zp_;
mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_; mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_;

View File

@ -145,6 +145,7 @@ int LstmFP32Coder::MallocRunBuffer(CoderContext *const context) {
kNumberTypeFloat32, lstm_param_->batch_ * lstm_param_->hidden_size_ * sizeof(float), kWorkspace)); kNumberTypeFloat32, lstm_param_->batch_ * lstm_param_->hidden_size_ * sizeof(float), kWorkspace));
MS_CHECK_PTR(buffer_[5]); MS_CHECK_PTR(buffer_[5]);
} }
buffer_[6] = nullptr;
return RET_OK; return RET_OK;
} }

View File

@ -44,7 +44,7 @@ class LstmFP32Coder final : public OperatorCoder {
float *weight_h_ptr_{nullptr}; float *weight_h_ptr_{nullptr};
float *input_bias_{nullptr}; float *input_bias_{nullptr};
float *state_bias_{nullptr}; float *state_bias_{nullptr};
float *buffer_[6]; float *buffer_[7];
int row_tile_{0}; int row_tile_{0};
int col_tile_{0}; int col_tile_{0};
int weight_batch_{0}; int weight_batch_{0};

View File

@ -160,7 +160,7 @@ int NPUSubGraph::BuildNPUInputOp() {
auto in_tensor = op->inputs()[i]; auto in_tensor = op->inputs()[i];
if (IsSubGraphInputTensor(in_tensor)) { if (IsSubGraphInputTensor(in_tensor)) {
auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name(); auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
hiai::op::Data *data; hiai::op::Data *data = nullptr;
data = ConverterToNPUData(in_tensor, tensor_name); data = ConverterToNPUData(in_tensor, tensor_name);
subgraph_input_ops_.push_back(*data); subgraph_input_ops_.push_back(*data);
input_ops.push_back(data); input_ops.push_back(data);

View File

@ -98,13 +98,11 @@ inline void Transpose8X8Fp32Arm64(const float *src_ptr, float *dst_ptr, int src_
void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int channel) { void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int channel) {
int hw8 = plane / C8NUM * C8NUM; int hw8 = plane / C8NUM * C8NUM;
int task_start = 0;
int task_end = plane;
int batch = plane * channel; int batch = plane * channel;
for (int n = 0; n < batches; n++) { for (int n = 0; n < batches; n++) {
const float *src_batch = (const float *)src + n * batch; const float *src_batch = (const float *)src + n * batch;
float *dst_batch = reinterpret_cast<float *>(dst) + n * batch; float *dst_batch = reinterpret_cast<float *>(dst) + n * batch;
int hw = task_start; int hw = 0;
for (; hw < hw8; hw += C8NUM) { for (; hw < hw8; hw += C8NUM) {
int c = 0; int c = 0;
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
@ -122,7 +120,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
} }
} }
} }
for (; hw < task_end; hw++) { for (; hw < plane; hw++) {
const float *src_ptr = src_batch + hw * channel; const float *src_ptr = src_batch + hw * channel;
float *dst_ptr = dst_batch + hw; float *dst_ptr = dst_batch + hw;
for (size_t i = 0; i < channel; i++) { for (size_t i = 0; i < channel; i++) {