forked from mindspore-Ecosystem/mindspore
!23653 [MSLITE][Develop] clean code check warnings
Merge pull request !23653 from yangruoqi713/codex
This commit is contained in:
commit
6fd0e67a7f
|
@ -68,7 +68,7 @@ void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data
|
||||||
}
|
}
|
||||||
|
|
||||||
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
|
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
|
||||||
float *out_deno, LayerNormParameter *param, size_t task_id) {
|
float *out_deno, const LayerNormParameter *param, size_t task_id) {
|
||||||
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
|
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
|
||||||
return NNACL_NULL_PTR;
|
return NNACL_NULL_PTR;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
|
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
|
||||||
float *out_deno, LayerNormParameter *param, size_t task_id);
|
float *out_deno, const LayerNormParameter *param, size_t task_id);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -154,20 +154,23 @@ void UpdataOutput(const float *cell_state, const float *output_gate, float *hidd
|
||||||
|
|
||||||
void UpdateLstmGate(float *gate_buffer, const float *input, const float *weight, const float *bias, int row, int deep,
|
void UpdateLstmGate(float *gate_buffer, const float *input, const float *weight, const float *bias, int row, int deep,
|
||||||
int col, int col_align, bool is_vec, float *packed_ptr) {
|
int col, int col_align, bool is_vec, float *packed_ptr) {
|
||||||
|
const float *weight_i = weight;
|
||||||
|
const float *bias_i = bias;
|
||||||
|
float *gate_i = gate_buffer;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
const float *weight_i;
|
LstmMatMul(gate_i, input, weight_i, bias_i, row, deep, col, col_align, is_vec, packed_ptr);
|
||||||
|
|
||||||
#ifdef ENABLE_AVX
|
#ifdef ENABLE_AVX
|
||||||
if (is_vec) {
|
if (is_vec) {
|
||||||
weight_i = weight + deep * col_align * i;
|
weight_i += deep * col_align;
|
||||||
} else {
|
} else {
|
||||||
weight_i = weight + deep * col * i;
|
weight_i += deep * col;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
weight_i = weight + deep * col * i;
|
weight_i += deep * col;
|
||||||
#endif
|
#endif
|
||||||
const float *bias_i = bias + col_align * i;
|
bias_i += col_align;
|
||||||
float *gate = gate_buffer + row * col * i;
|
gate_i += row * col;
|
||||||
LstmMatMul(gate, input, weight_i, bias_i, row, deep, col, col_align, is_vec, packed_ptr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,7 +231,7 @@ void LstmStepUnit(float *output, float *input_gate, float *forget_gate, float *c
|
||||||
|
|
||||||
void LstmUnidirectional(float *output, const float *packed_input, const float *weight_i, const float *weight_h,
|
void LstmUnidirectional(float *output, const float *packed_input, const float *weight_i, const float *weight_h,
|
||||||
const float *input_bias, const float *state_bias, float *hidden_state, float *cell_state,
|
const float *input_bias, const float *state_bias, float *hidden_state, float *cell_state,
|
||||||
float *buffer[6], const LstmParameter *lstm_param, bool is_backward) {
|
float *buffer[7], const LstmParameter *lstm_param, bool is_backward) {
|
||||||
float *gate = buffer[1];
|
float *gate = buffer[1];
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
const float *weight_loop = weight_i + lstm_param->input_size_ * lstm_param->input_col_align_ * i;
|
const float *weight_loop = weight_i + lstm_param->input_size_ * lstm_param->input_col_align_ * i;
|
||||||
|
@ -256,7 +259,7 @@ void LstmUnidirectional(float *output, const float *packed_input, const float *w
|
||||||
}
|
}
|
||||||
|
|
||||||
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias,
|
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias,
|
||||||
const float *state_bias, float *hidden_state, float *cell_state, float *buffer[6],
|
const float *state_bias, float *hidden_state, float *cell_state, float *buffer[7],
|
||||||
const LstmParameter *lstm_param) {
|
const LstmParameter *lstm_param) {
|
||||||
// forward
|
// forward
|
||||||
float *packed_input = buffer[0];
|
float *packed_input = buffer[0];
|
||||||
|
|
|
@ -36,10 +36,10 @@ int ElementOptMulAcc(const float *input0, const float input1, float *output, con
|
||||||
|
|
||||||
void LstmStepUnit(float *output, float *input_gate, float *forget_gate, float *cell_gate, float *output_gate,
|
void LstmStepUnit(float *output, float *input_gate, float *forget_gate, float *cell_gate, float *output_gate,
|
||||||
const float *state_weight, const float *state_bias, float *hidden_state, float *cell_state,
|
const float *state_weight, const float *state_bias, float *hidden_state, float *cell_state,
|
||||||
float *buffer[6], const LstmParameter *lstm_param);
|
float *buffer[7], const LstmParameter *lstm_param);
|
||||||
|
|
||||||
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias,
|
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *input_bias,
|
||||||
const float *state_bias, float *hidden_state, float *cell_state, float *buffer[6],
|
const float *state_bias, float *hidden_state, float *cell_state, float *buffer[7],
|
||||||
const LstmParameter *lstm_param);
|
const LstmParameter *lstm_param);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,17 +34,17 @@ void PostFuncInt8C4(const int32_t *in, const int32_t *bias, int8_t *out, size_t
|
||||||
void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels,
|
void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels,
|
||||||
int output_channel, int input_step, int8_t input_zp);
|
int output_channel, int input_step, int8_t input_zp);
|
||||||
void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp,
|
void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp,
|
||||||
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t acc_min,
|
const int32_t *out_multiplier, const int32_t *left_shift,
|
||||||
int32_t acc_max);
|
const int32_t *right_shift, int32_t acc_min, int32_t acc_max);
|
||||||
void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier,
|
void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier,
|
||||||
int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max);
|
int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max);
|
||||||
void IndirectGemmInt16to32_8x4(int32_t *dst, const int16_t *src, const int16_t *weight, size_t ksize, size_t ic8,
|
void IndirectGemmInt16to32_8x4(int32_t *dst, const int16_t *src, const int16_t *weight, size_t ksize, size_t ic8,
|
||||||
size_t oc4, size_t offset);
|
size_t oc4, size_t offset);
|
||||||
void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height,
|
void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height,
|
||||||
size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel,
|
size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel,
|
||||||
size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, int8_t *in_zp,
|
size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, const int8_t *in_zp,
|
||||||
int32_t *out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift,
|
const int32_t *out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
|
||||||
int32_t *acc_min, int32_t *acc_max);
|
const int32_t *right_shift, const int32_t *acc_min, const int32_t *acc_max);
|
||||||
void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width,
|
void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width,
|
||||||
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
|
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
|
||||||
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step);
|
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step);
|
||||||
|
@ -59,8 +59,8 @@ int32x4_t ClacScaledInput(int32x4_t input, int32x4_t left_shift_result_vec, int3
|
||||||
#ifdef ENABLE_ARM32
|
#ifdef ENABLE_ARM32
|
||||||
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||||
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
|
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
|
||||||
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t acc_min,
|
const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
|
||||||
int32_t acc_max, size_t per_channel);
|
int32_t acc_min, int32_t acc_max, size_t per_channel);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef ENABLE_ARM64
|
#ifdef ENABLE_ARM64
|
||||||
|
@ -69,23 +69,24 @@ void PostFuncInt8C4Neon64(const int32_t *in, const int32_t *bias, int8_t *out, s
|
||||||
int32_t zp, int32_t mini, int32_t maxi);
|
int32_t zp, int32_t mini, int32_t maxi);
|
||||||
void ConvDw3x3Int8Neon64(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
|
void ConvDw3x3Int8Neon64(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
|
||||||
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
|
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
|
||||||
int32_t out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift,
|
int32_t out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
|
||||||
int32_t acc_min, int32_t acc_max, size_t per_channel);
|
const int32_t *right_shift, int32_t acc_min, int32_t acc_max, size_t per_channel);
|
||||||
void ConvDw3x3Int8Stride2(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
|
void ConvDw3x3Int8Stride2(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
|
||||||
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
|
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
|
||||||
int32_t out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift,
|
int32_t out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
|
||||||
int32_t acc_min, int32_t acc_max, size_t per_channel);
|
const int32_t *right_shift, int32_t acc_min, int32_t acc_max, size_t per_channel);
|
||||||
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step,
|
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step,
|
||||||
size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, int32_t *out_multiplier,
|
size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, const int32_t *out_multiplier,
|
||||||
int32_t *left_shift, int32_t *right_shift, size_t acc_min, size_t acc_max, size_t per_channel);
|
const int32_t *left_shift, const int32_t *right_shift, size_t acc_min, size_t acc_max,
|
||||||
|
size_t per_channel);
|
||||||
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias,
|
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias,
|
||||||
size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
|
size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
|
||||||
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, size_t acc_min,
|
const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
|
||||||
size_t acc_max, size_t per_channel);
|
size_t acc_min, size_t acc_max, size_t per_channel);
|
||||||
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias,
|
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias,
|
||||||
size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
|
size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
|
||||||
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, size_t acc_min,
|
const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
|
||||||
size_t acc_max, size_t per_channel);
|
size_t acc_min, size_t acc_max, size_t per_channel);
|
||||||
#endif
|
#endif
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,8 +33,9 @@ void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ConvDwInt8Post(int8_t *dst, int32_t *buffer, int output_w, int channel, int32_t output_zp, int32_t *out_multiplier,
|
void ConvDwInt8Post(int8_t *dst, int32_t *buffer, int output_w, int channel, int32_t output_zp,
|
||||||
int32_t *left_shift, int32_t *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) {
|
const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
|
||||||
|
int32_t acc_min, int32_t acc_max, bool per_channel) {
|
||||||
if (per_channel) {
|
if (per_channel) {
|
||||||
// support perchannel
|
// support perchannel
|
||||||
for (int w = 0; w < output_w; w++) {
|
for (int w = 0; w < output_w; w++) {
|
||||||
|
@ -207,8 +208,8 @@ void ConvDw3x3Int8Window(int8_t *output, const int8_t *buffer, const int16_t *we
|
||||||
|
|
||||||
void ConvDw3x3Int8Block(int8_t *output, const int8_t *buffer, const int16_t *weight, const int32_t *bias, int start_c,
|
void ConvDw3x3Int8Block(int8_t *output, const int8_t *buffer, const int16_t *weight, const int32_t *bias, int start_c,
|
||||||
int end_c, int col_size, int row_size, int channel, int output_h, int output_w, int8_t in_zp,
|
int end_c, int col_size, int row_size, int channel, int output_h, int output_w, int8_t in_zp,
|
||||||
int32_t out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift,
|
int32_t out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
|
||||||
int32_t acc_min, int32_t acc_max, int stride, bool per_channel) {
|
const int32_t *right_shift, int32_t acc_min, int32_t acc_max, int stride, bool per_channel) {
|
||||||
for (; start_c <= end_c - 8; start_c += 8) {
|
for (; start_c <= end_c - 8; start_c += 8) {
|
||||||
#ifdef ENABLE_ARM64
|
#ifdef ENABLE_ARM64
|
||||||
if (stride == 1) {
|
if (stride == 1) {
|
||||||
|
@ -330,8 +331,8 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data
|
||||||
#ifndef ENABLE_ARM32
|
#ifndef ENABLE_ARM32
|
||||||
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||||
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
|
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
|
||||||
const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t acc_min,
|
const int *out_multiplier, const int *left_shift, const int *right_shift,
|
||||||
int32_t acc_max, bool per_channel) {
|
const int32_t acc_min, const int32_t acc_max, bool per_channel) {
|
||||||
for (int c = 0; c < channel; c += 8) {
|
for (int c = 0; c < channel; c += 8) {
|
||||||
int tmp_buffer[8];
|
int tmp_buffer[8];
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
|
@ -385,22 +386,25 @@ void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *wei
|
||||||
|
|
||||||
#ifndef ENABLE_ARM64
|
#ifndef ENABLE_ARM64
|
||||||
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
||||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int *out_multiplier,
|
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
|
||||||
int *left_shift, int *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) {
|
const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
|
||||||
|
bool per_channel) {
|
||||||
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
||||||
left_shift, right_shift, acc_min, acc_max, per_channel);
|
left_shift, right_shift, acc_min, acc_max, per_channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
||||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int *out_multiplier,
|
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
|
||||||
int *left_shift, int *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) {
|
const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
|
||||||
|
bool per_channel) {
|
||||||
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 3, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 3, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
||||||
left_shift, right_shift, acc_min, acc_max, per_channel);
|
left_shift, right_shift, acc_min, acc_max, per_channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
||||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, int *out_multiplier,
|
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
|
||||||
int *left_shift, int *right_shift, int32_t acc_min, int32_t acc_max, bool per_channel) {
|
const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
|
||||||
|
bool per_channel) {
|
||||||
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 3, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 3, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
||||||
left_shift, right_shift, acc_min, acc_max, per_channel);
|
left_shift, right_shift, acc_min, acc_max, per_channel);
|
||||||
}
|
}
|
||||||
|
@ -494,9 +498,9 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16
|
||||||
|
|
||||||
/*conv depthwise sliding window perchannel int8 begin*/
|
/*conv depthwise sliding window perchannel int8 begin*/
|
||||||
void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||||
int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, int32_t *out_zp,
|
int width, int in_kh_step, int in_kw_step, int kernel_w, const int8_t *input_zp,
|
||||||
const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t *acc_min,
|
const int32_t *out_zp, const int *out_multiplier, const int *left_shift,
|
||||||
int32_t *acc_max) {
|
const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
|
||||||
int tmp_buffer[C8NUM];
|
int tmp_buffer[C8NUM];
|
||||||
for (int i = 0; i < C8NUM; i++) {
|
for (int i = 0; i < C8NUM; i++) {
|
||||||
tmp_buffer[i] = 0;
|
tmp_buffer[i] = 0;
|
||||||
|
@ -531,7 +535,7 @@ void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight
|
||||||
|
|
||||||
void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom,
|
void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom,
|
||||||
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
|
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
|
||||||
int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, const int *left_shift,
|
const int8_t *in_zp, const int32_t *out_zp, const int *out_multiplier, const int *left_shift,
|
||||||
const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
|
const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
|
||||||
int8_t *dst_h = dst + top * sliding->out_h_step_;
|
int8_t *dst_h = dst + top * sliding->out_h_step_;
|
||||||
for (int oh = top; oh < bottom; oh++) {
|
for (int oh = top; oh < bottom; oh++) {
|
||||||
|
@ -613,7 +617,7 @@ void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, con
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
||||||
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param,
|
const int8_t *input_zp, const int32_t *output_zp, const ConvParameter *conv_param,
|
||||||
const SlidingWindowParam *sliding, int task_id) {
|
const SlidingWindowParam *sliding, int task_id) {
|
||||||
NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_);
|
NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_);
|
||||||
NNACL_CHECK_ZERO_RETURN(conv_param->dilation_w_);
|
NNACL_CHECK_ZERO_RETURN(conv_param->dilation_w_);
|
||||||
|
@ -631,8 +635,8 @@ void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *
|
||||||
int *right_shift = conv_param->conv_quant_arg_.right_shift_ + oc * C8NUM;
|
int *right_shift = conv_param->conv_quant_arg_.right_shift_ + oc * C8NUM;
|
||||||
int *acc_min = conv_param->conv_quant_arg_.out_act_min_ + oc * C8NUM;
|
int *acc_min = conv_param->conv_quant_arg_.out_act_min_ + oc * C8NUM;
|
||||||
int *acc_max = conv_param->conv_quant_arg_.out_act_max_ + oc * C8NUM;
|
int *acc_max = conv_param->conv_quant_arg_.out_act_max_ + oc * C8NUM;
|
||||||
int8_t *in_zp = input_zp + oc * C8NUM;
|
const int8_t *in_zp = input_zp + oc * C8NUM;
|
||||||
int32_t *out_zp = output_zp + oc * C8NUM;
|
const int32_t *out_zp = output_zp + oc * C8NUM;
|
||||||
|
|
||||||
ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param,
|
ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param,
|
||||||
sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||||
|
|
|
@ -35,7 +35,7 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data
|
||||||
int task_id);
|
int task_id);
|
||||||
|
|
||||||
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
||||||
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param,
|
const int8_t *input_zp, const int32_t *output_zp, const ConvParameter *conv_param,
|
||||||
const SlidingWindowParam *sliding, int task_id);
|
const SlidingWindowParam *sliding, int task_id);
|
||||||
|
|
||||||
void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *input_data, const int16_t *weight_data,
|
void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *input_data, const int16_t *weight_data,
|
||||||
|
|
|
@ -115,7 +115,7 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
|
||||||
}
|
}
|
||||||
#ifdef ENABLE_ARM
|
#ifdef ENABLE_ARM
|
||||||
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
|
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
|
||||||
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_);
|
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
|
||||||
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
|
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
|
||||||
int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
|
int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
|
||||||
int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
|
int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
|
||||||
|
@ -199,10 +199,10 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
|
||||||
for (; j < depth; ++j) {
|
for (; j < depth; ++j) {
|
||||||
const int32_t input0_val = zp1 + input0_data[j];
|
const int32_t input0_val = zp1 + input0_data[j];
|
||||||
const int32_t input1_val = zp2 + input1_data[0];
|
const int32_t input1_val = zp2 + input1_data[0];
|
||||||
int32_t mul_result =
|
int32_t mul_result = RoundingDivideByPOT(
|
||||||
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_),
|
SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << (size_t)quant_arg->shift_left_),
|
||||||
quant_arg->output_multiplier_),
|
quant_arg->output_multiplier_),
|
||||||
quant_arg->shift_right_);
|
quant_arg->shift_right_);
|
||||||
|
|
||||||
mul_result += quant_arg->out_quant_arg_.zp_;
|
mul_result += quant_arg->out_quant_arg_.zp_;
|
||||||
mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_;
|
mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_;
|
||||||
|
@ -224,10 +224,10 @@ void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_da
|
||||||
for (; index < real_dst_count; ++index) {
|
for (; index < real_dst_count; ++index) {
|
||||||
const int32_t input0_val = quant_arg->in_quant_args_[0].zp_ + input0_data[index];
|
const int32_t input0_val = quant_arg->in_quant_args_[0].zp_ + input0_data[index];
|
||||||
const int32_t input1_val = quant_arg->in_quant_args_[1].zp_ + input1_data[index];
|
const int32_t input1_val = quant_arg->in_quant_args_[1].zp_ + input1_data[index];
|
||||||
int32_t mul_result =
|
int32_t mul_result = RoundingDivideByPOT(
|
||||||
RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << quant_arg->shift_left_),
|
SaturatingRoundingDoublingHighMul(input0_val * input1_val * (1 << (size_t)quant_arg->shift_left_),
|
||||||
quant_arg->output_multiplier_),
|
quant_arg->output_multiplier_),
|
||||||
quant_arg->shift_right_);
|
quant_arg->shift_right_);
|
||||||
|
|
||||||
mul_result += quant_arg->out_quant_arg_.zp_;
|
mul_result += quant_arg->out_quant_arg_.zp_;
|
||||||
mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_;
|
mul_result = mul_result < quant_arg->output_activation_max_ ? mul_result : quant_arg->output_activation_max_;
|
||||||
|
|
|
@ -145,6 +145,7 @@ int LstmFP32Coder::MallocRunBuffer(CoderContext *const context) {
|
||||||
kNumberTypeFloat32, lstm_param_->batch_ * lstm_param_->hidden_size_ * sizeof(float), kWorkspace));
|
kNumberTypeFloat32, lstm_param_->batch_ * lstm_param_->hidden_size_ * sizeof(float), kWorkspace));
|
||||||
MS_CHECK_PTR(buffer_[5]);
|
MS_CHECK_PTR(buffer_[5]);
|
||||||
}
|
}
|
||||||
|
buffer_[6] = nullptr;
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ class LstmFP32Coder final : public OperatorCoder {
|
||||||
float *weight_h_ptr_{nullptr};
|
float *weight_h_ptr_{nullptr};
|
||||||
float *input_bias_{nullptr};
|
float *input_bias_{nullptr};
|
||||||
float *state_bias_{nullptr};
|
float *state_bias_{nullptr};
|
||||||
float *buffer_[6];
|
float *buffer_[7];
|
||||||
int row_tile_{0};
|
int row_tile_{0};
|
||||||
int col_tile_{0};
|
int col_tile_{0};
|
||||||
int weight_batch_{0};
|
int weight_batch_{0};
|
||||||
|
|
|
@ -160,7 +160,7 @@ int NPUSubGraph::BuildNPUInputOp() {
|
||||||
auto in_tensor = op->inputs()[i];
|
auto in_tensor = op->inputs()[i];
|
||||||
if (IsSubGraphInputTensor(in_tensor)) {
|
if (IsSubGraphInputTensor(in_tensor)) {
|
||||||
auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
|
auto tensor_name = "Input_" + std::to_string(count++) + '_' + op->name();
|
||||||
hiai::op::Data *data;
|
hiai::op::Data *data = nullptr;
|
||||||
data = ConverterToNPUData(in_tensor, tensor_name);
|
data = ConverterToNPUData(in_tensor, tensor_name);
|
||||||
subgraph_input_ops_.push_back(*data);
|
subgraph_input_ops_.push_back(*data);
|
||||||
input_ops.push_back(data);
|
input_ops.push_back(data);
|
||||||
|
|
|
@ -98,13 +98,11 @@ inline void Transpose8X8Fp32Arm64(const float *src_ptr, float *dst_ptr, int src_
|
||||||
|
|
||||||
void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int channel) {
|
void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int channel) {
|
||||||
int hw8 = plane / C8NUM * C8NUM;
|
int hw8 = plane / C8NUM * C8NUM;
|
||||||
int task_start = 0;
|
|
||||||
int task_end = plane;
|
|
||||||
int batch = plane * channel;
|
int batch = plane * channel;
|
||||||
for (int n = 0; n < batches; n++) {
|
for (int n = 0; n < batches; n++) {
|
||||||
const float *src_batch = (const float *)src + n * batch;
|
const float *src_batch = (const float *)src + n * batch;
|
||||||
float *dst_batch = reinterpret_cast<float *>(dst) + n * batch;
|
float *dst_batch = reinterpret_cast<float *>(dst) + n * batch;
|
||||||
int hw = task_start;
|
int hw = 0;
|
||||||
for (; hw < hw8; hw += C8NUM) {
|
for (; hw < hw8; hw += C8NUM) {
|
||||||
int c = 0;
|
int c = 0;
|
||||||
#ifdef ENABLE_ARM64
|
#ifdef ENABLE_ARM64
|
||||||
|
@ -122,7 +120,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (; hw < task_end; hw++) {
|
for (; hw < plane; hw++) {
|
||||||
const float *src_ptr = src_batch + hw * channel;
|
const float *src_ptr = src_batch + hw * channel;
|
||||||
float *dst_ptr = dst_batch + hw;
|
float *dst_ptr = dst_batch + hw;
|
||||||
for (size_t i = 0; i < channel; i++) {
|
for (size_t i = 0; i < channel; i++) {
|
||||||
|
|
Loading…
Reference in New Issue