!8353 [MSLITE][Develop] optimize arm cpu int8 conv depthwise
From: @yangruoqi713 Reviewed-by: Signed-off-by:
This commit is contained in:
commit
3177a4245e
|
@ -1,4 +1,4 @@
|
|||
#ifdef __arm__
|
||||
#if 0
|
||||
#ifndef __aarch64__
|
||||
|
||||
.text
|
||||
|
|
|
@ -152,9 +152,8 @@ void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *
|
|||
}
|
||||
|
||||
/*conv depthwise fp32 begin*/
|
||||
#ifndef ENABLE_ARM64
|
||||
void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width,
|
||||
int in_kh_step, int in_kw_step, int kernel_w_step, bool is_relu, bool is_relu6) {
|
||||
void ConvDwBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width,
|
||||
int in_kh_step, int in_kw_step, int kernel_w_step, bool is_relu, bool is_relu6) {
|
||||
const float *src_kh = src;
|
||||
const float *weight_kh = weight;
|
||||
for (int c = 0; c < C4NUM; c++) {
|
||||
|
@ -179,10 +178,9 @@ void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, con
|
|||
dst[c] = (is_relu6) ? (MSMIN(6, MSMAX(0, dst[c]))) : (dst[c]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void DepthwiseBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom,
|
||||
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
void ConvDwBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left,
|
||||
int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
bool relu = conv_param->act_type_ == ActType_Relu;
|
||||
bool relu6 = conv_param->act_type_ == ActType_Relu6;
|
||||
float *dst_h = dst + top * sliding->out_h_step_;
|
||||
|
@ -207,8 +205,8 @@ void DepthwiseBorder(float *dst, const float *src, const float *weight, const fl
|
|||
sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float),
|
||||
conv_param->kernel_w_ * C4NUM * sizeof(float), relu, relu6);
|
||||
#else
|
||||
DepthwiseBorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM, relu, relu6);
|
||||
ConvDwBorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM, relu, relu6);
|
||||
#endif
|
||||
dst_kernel += sliding->block_channel_;
|
||||
} // width loop
|
||||
|
@ -217,9 +215,9 @@ void DepthwiseBorder(float *dst, const float *src, const float *weight, const fl
|
|||
}
|
||||
|
||||
#ifndef ENABLE_ARM64
|
||||
void DepthwiseCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width,
|
||||
int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step,
|
||||
int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) {
|
||||
void ConvDwCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width,
|
||||
int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step,
|
||||
int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) {
|
||||
float *dst_h = dst;
|
||||
const float *src_h = src;
|
||||
for (int oh = 0; oh < height; oh++) {
|
||||
|
@ -260,7 +258,7 @@ void DepthwiseCenter(float *dst, const float *src, const float *weight, const fl
|
|||
#endif
|
||||
|
||||
// conv depthwise fp32: sliding window
|
||||
void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) {
|
||||
bool relu = conv_param->act_type_ == ActType_Relu;
|
||||
bool relu6 = conv_param->act_type_ == ActType_Relu6;
|
||||
|
@ -272,14 +270,13 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig
|
|||
float *dst_data = dst + oc * C4NUM;
|
||||
const float *weight = weight_data + oc * sliding->kernel_step_;
|
||||
const float *bias = bias_data + oc * C4NUM;
|
||||
DepthwiseBorder(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param,
|
||||
sliding);
|
||||
DepthwiseBorder(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0,
|
||||
conv_param->output_w_, conv_param, sliding);
|
||||
DepthwiseBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param,
|
||||
sliding);
|
||||
DepthwiseBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->output_w_, conv_param, sliding);
|
||||
ConvDwBorder(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, sliding);
|
||||
ConvDwBorder(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, conv_param->output_w_,
|
||||
conv_param, sliding);
|
||||
ConvDwBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param,
|
||||
sliding);
|
||||
ConvDwBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->output_w_, conv_param, sliding);
|
||||
|
||||
if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) {
|
||||
int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -293,10 +290,10 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig
|
|||
sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float),
|
||||
sliding->in_kw_step_ * sizeof(float), relu, relu6);
|
||||
#else
|
||||
DepthwiseCenter(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_,
|
||||
sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, relu,
|
||||
relu6);
|
||||
ConvDwCenter(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_,
|
||||
sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, relu,
|
||||
relu6);
|
||||
#endif
|
||||
}
|
||||
} // output C4 loop
|
||||
|
@ -308,8 +305,8 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig
|
|||
/*conv depthwise fp32 end*/
|
||||
|
||||
/*deconv depthwise fp32 begin*/
|
||||
void DeconvDepthwiseBorderPixel(float *dst, const float *src, const float *weight, int height, int width,
|
||||
int in_kh_step, int in_kw_step, int kernel_w_step) {
|
||||
void DeconvDwBorderPixel(float *dst, const float *src, const float *weight, int height, int width, int in_kh_step,
|
||||
int in_kw_step, int kernel_w_step) {
|
||||
float *dst_kh = dst;
|
||||
const float *weight_kh = weight;
|
||||
for (int kh = 0; kh < height; kh++) {
|
||||
|
@ -335,8 +332,8 @@ void DeconvDepthwiseBorderPixel(float *dst, const float *src, const float *weigh
|
|||
} // kernel_h loop
|
||||
}
|
||||
|
||||
void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, int top, int bottom, int left, int right,
|
||||
const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
void DeconvDwBorder(float *dst, const float *src, const float *weight, int top, int bottom, int left, int right,
|
||||
const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
const float *src_h = src + top * sliding->out_h_step_;
|
||||
for (int ih = top; ih < bottom; ih++) {
|
||||
int oh = ih * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -358,8 +355,8 @@ void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, in
|
|||
sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float),
|
||||
conv_param->kernel_w_ * C4NUM * sizeof(float));
|
||||
#else
|
||||
DeconvDepthwiseBorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM);
|
||||
DeconvDwBorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM);
|
||||
#endif
|
||||
src_kernel += sliding->block_channel_;
|
||||
} // width loop
|
||||
|
@ -368,9 +365,9 @@ void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, in
|
|||
}
|
||||
|
||||
#ifndef ENABLE_ARM64
|
||||
void DeconvDepthwiseCenter(float *dst, const float *src, const float *weight, int height, int width, int kernel_h,
|
||||
int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step,
|
||||
int in_kh_step, int in_kw_step) {
|
||||
void DeconvDwCenter(float *dst, const float *src, const float *weight, int height, int width, int kernel_h,
|
||||
int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, int in_kh_step,
|
||||
int in_kw_step) {
|
||||
float *dst_h = dst;
|
||||
const float *src_h = src;
|
||||
for (int oh = 0; oh < height; oh++) {
|
||||
|
@ -401,7 +398,7 @@ void DeconvDepthwiseCenter(float *dst, const float *src, const float *weight, in
|
|||
}
|
||||
#endif
|
||||
|
||||
void DeconvDepthwisePostFunc(float *dst, const float *bias, int block_channel, const ConvParameter *conv_param) {
|
||||
void DeconvDwPost(float *dst, const float *bias, int block_channel, const ConvParameter *conv_param) {
|
||||
bool relu = conv_param->act_type_ == ActType_Relu;
|
||||
bool relu6 = conv_param->act_type_ == ActType_Relu6;
|
||||
float *dst_k = dst;
|
||||
|
@ -416,7 +413,7 @@ void DeconvDepthwisePostFunc(float *dst, const float *bias, int block_channel, c
|
|||
}
|
||||
|
||||
// deconv depthwise fp32: sliding window
|
||||
void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) {
|
||||
const float *src = input_data;
|
||||
float *dst = output_data;
|
||||
|
@ -426,13 +423,13 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we
|
|||
float *dst_data = dst + oc * C4NUM;
|
||||
const float *weight = weight_data + oc * sliding->kernel_step_;
|
||||
const float *bias = bias_data + oc * C4NUM;
|
||||
DeconvDepthwiseBorder(dst_data, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, sliding);
|
||||
DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, conv_param->input_w_,
|
||||
conv_param, sliding);
|
||||
DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param,
|
||||
sliding);
|
||||
DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->input_w_, conv_param, sliding);
|
||||
DeconvDwBorder(dst_data, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, sliding);
|
||||
DeconvDwBorder(dst_data, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, conv_param->input_w_,
|
||||
conv_param, sliding);
|
||||
DeconvDwBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param,
|
||||
sliding);
|
||||
DeconvDwBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, conv_param->input_w_,
|
||||
conv_param, sliding);
|
||||
|
||||
if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) {
|
||||
int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -447,13 +444,12 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we
|
|||
sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float),
|
||||
sliding->in_kw_step_ * sizeof(float));
|
||||
#else
|
||||
DeconvDepthwiseCenter(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_,
|
||||
sliding->block_channel_, sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_,
|
||||
sliding->in_kw_step_);
|
||||
DeconvDwCenter(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_,
|
||||
sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_);
|
||||
#endif
|
||||
}
|
||||
DeconvDepthwisePostFunc(dst_data, bias, sliding->block_channel_, conv_param);
|
||||
DeconvDwPost(dst_data, bias, sliding->block_channel_, conv_param);
|
||||
} // output C4 loop
|
||||
src += sliding->out_step_;
|
||||
dst += sliding->in_step_;
|
||||
|
|
|
@ -42,13 +42,10 @@ void InitSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *co
|
|||
|
||||
void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block);
|
||||
|
||||
void DepthwiseBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom,
|
||||
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding);
|
||||
|
||||
void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);
|
||||
|
||||
void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
|
||||
const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -139,7 +139,7 @@ void ConvDwInt8(int8_t *output_data, int32_t *row_buffer, const int8_t *input_da
|
|||
/*conv depthwise int8 end*/
|
||||
|
||||
/*conv depthwise 3x3 int8 begin*/
|
||||
bool CheckIfUse3X3(const ConvParameter *conv_param) {
|
||||
bool CheckConvDwInt8Use3X3(const ConvParameter *conv_param) {
|
||||
bool use_3x3 = conv_param->kernel_h_ == 3 && conv_param->kernel_w_ == 3 &&
|
||||
(conv_param->stride_h_ == 1 || conv_param->stride_h_ == 2) &&
|
||||
(conv_param->stride_w_ == 1 || conv_param->stride_w_ == 2) &&
|
||||
|
@ -158,8 +158,8 @@ bool CheckIfUse3X3(const ConvParameter *conv_param) {
|
|||
return use_3x3;
|
||||
}
|
||||
|
||||
void InitInputBuffer(int8_t *buffer, const int8_t *input, const ConvParameter *conv_param, int block_input_h,
|
||||
int block_input_w) {
|
||||
void ConvDw3x3Int8InitBuffer(int8_t *buffer, const int8_t *input, const ConvParameter *conv_param, int block_input_h,
|
||||
int block_input_w) {
|
||||
for (int h = 0; h < block_input_h; h++) {
|
||||
const int8_t *src = input;
|
||||
for (int w = 0; w < block_input_w; w++) {
|
||||
|
@ -257,7 +257,7 @@ void ConvDw3x3Int8Row(int8_t *output, int8_t *buffer, const int8_t *input, const
|
|||
const int32_t *bias_ptr = bias;
|
||||
int c = 0;
|
||||
for (; c <= conv_param->output_channel_ - 64; c += 64) {
|
||||
InitInputBuffer(buffer, input_ptr, conv_param, block_input_h, block_input_w);
|
||||
ConvDw3x3Int8InitBuffer(buffer, input_ptr, conv_param, block_input_h, block_input_w);
|
||||
ConvDw3x3Int8Block(output_ptr, buffer, weight_ptr, bias_ptr, 0, 64, 64, ih_offset, conv_param->input_channel_,
|
||||
block_output_h, block_output_w, in_zp, out_zp, out_multiplier, left_shift, right_shift,
|
||||
acc_min, acc_max, conv_param->stride_h_);
|
||||
|
@ -489,10 +489,10 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16
|
|||
/*conv depthwise 3x3 int8 end*/
|
||||
|
||||
/*conv depthwise sliding window perchannel int8 begin*/
|
||||
void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||
int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp,
|
||||
int32_t *out_zp, const int *out_multiplier, const int *left_shift, const int *right_shift,
|
||||
int32_t *acc_min, int32_t *acc_max) {
|
||||
void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||
int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, int32_t *out_zp,
|
||||
const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t *acc_min,
|
||||
int32_t *acc_max) {
|
||||
int tmp_buffer[C8NUM];
|
||||
for (int i = 0; i < C8NUM; i++) {
|
||||
tmp_buffer[i] = 0;
|
||||
|
@ -525,10 +525,10 @@ void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *wei
|
|||
}
|
||||
}
|
||||
|
||||
void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top,
|
||||
int bottom, int left, int right, const ConvParameter *conv_param,
|
||||
const SlidingWindowParam *sliding, int8_t *in_zp, int32_t *out_zp, const int *out_multiplier,
|
||||
const int *left_shift, const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
|
||||
void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom,
|
||||
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
|
||||
int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, const int *left_shift,
|
||||
const int *right_shift, int32_t *acc_min, int32_t *acc_max) {
|
||||
int8_t *dst_h = dst + top * sliding->out_h_step_;
|
||||
for (int oh = top; oh < bottom; oh++) {
|
||||
int ih = oh * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -546,9 +546,9 @@ void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight,
|
|||
const int8_t *src_kernel = src_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_;
|
||||
const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C8NUM;
|
||||
|
||||
DepthwiseBorderPixelInt8(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_, in_zp, out_zp,
|
||||
out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||
ConvDwInt8BorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_, in_zp, out_zp,
|
||||
out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||
|
||||
dst_kernel += sliding->block_channel_;
|
||||
} // width loop
|
||||
|
@ -556,12 +556,11 @@ void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight,
|
|||
} // height loop
|
||||
}
|
||||
|
||||
#ifndef ENABLE_ARM64
|
||||
void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||
int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step,
|
||||
int in_sw_step, int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp,
|
||||
int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t *acc_min,
|
||||
int32_t *acc_max) {
|
||||
#ifndef ENABLE_ARM
|
||||
void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, int width,
|
||||
int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step,
|
||||
int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp, int32_t *out_multiplier,
|
||||
int32_t *left_shift, int32_t *right_shift, int32_t *acc_min, int32_t *acc_max) {
|
||||
int tmp_buffer[C8NUM];
|
||||
int8_t *dst_h = dst;
|
||||
const int8_t *src_h = src;
|
||||
|
@ -608,7 +607,7 @@ void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight,
|
|||
}
|
||||
#endif
|
||||
|
||||
void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
||||
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
||||
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param,
|
||||
const SlidingWindowParam *sliding, int task_id) {
|
||||
const int8_t *src = input_data;
|
||||
|
@ -628,37 +627,26 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *
|
|||
int8_t *in_zp = input_zp + oc * C8NUM;
|
||||
int32_t *out_zp = output_zp + oc * C8NUM;
|
||||
|
||||
DepthwiseBorderInt8(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param,
|
||||
sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||
DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0,
|
||||
conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift,
|
||||
right_shift, acc_min, acc_max);
|
||||
DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_,
|
||||
conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min,
|
||||
acc_max);
|
||||
DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift,
|
||||
right_shift, acc_min, acc_max);
|
||||
ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param,
|
||||
sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||
ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0,
|
||||
conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift,
|
||||
right_shift, acc_min, acc_max);
|
||||
ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param,
|
||||
sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||
ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift,
|
||||
right_shift, acc_min, acc_max);
|
||||
|
||||
if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) {
|
||||
int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
int in_w_start = sliding->left_ * conv_param->stride_w_ - conv_param->pad_l_;
|
||||
const int8_t *in_t = src_data + in_h_start * sliding->in_h_step_ + in_w_start * sliding->block_channel_;
|
||||
int8_t *out_t = dst_data + sliding->top_ * sliding->out_h_step_ + sliding->left_ * sliding->block_channel_;
|
||||
#ifdef ENABLE_ARM
|
||||
ConvDwInt8Center(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_ * sizeof(int8_t),
|
||||
sliding->block_channel_ * sizeof(int8_t), sliding->in_sh_step_ * sizeof(int8_t),
|
||||
sliding->in_sw_step_ * sizeof(int8_t), sliding->in_kh_step_ * sizeof(int8_t),
|
||||
sliding->in_kw_step_ * sizeof(int8_t), in_zp, out_zp, out_multiplier, left_shift, right_shift,
|
||||
acc_min, acc_max);
|
||||
#else
|
||||
DepthwiseCenterInt8(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_,
|
||||
sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_,
|
||||
sliding->out_h_step_, sliding->block_channel_, sliding->in_sh_step_, sliding->in_sw_step_,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, in_zp, out_zp, out_multiplier, left_shift,
|
||||
right_shift, acc_min, acc_max);
|
||||
#endif
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_,
|
||||
sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, in_zp,
|
||||
out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max);
|
||||
}
|
||||
} // output C8 loop
|
||||
src += sliding->in_step_;
|
||||
|
@ -669,8 +657,8 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *
|
|||
/*conv depthwise sliding window perchannel int8 end*/
|
||||
|
||||
/*deconv depthwise int8 begin*/
|
||||
void DeconvDepthwiseBorderPixelInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width,
|
||||
int in_kh_step, int in_kw_step, int kernel_w) {
|
||||
void DeconvDwInt8BorderPixel(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width,
|
||||
int in_kh_step, int in_kw_step, int kernel_w) {
|
||||
int32_t *dst_kh = dst;
|
||||
const int16_t *weight_kh = weight;
|
||||
for (int kh = 0; kh < height; kh++) {
|
||||
|
@ -688,8 +676,8 @@ void DeconvDepthwiseBorderPixelInt8(int32_t *dst, const int16_t *src, const int1
|
|||
} // kernel_h loop
|
||||
}
|
||||
|
||||
void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int top, int bottom, int left,
|
||||
int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
void DeconvDwInt8Border(int32_t *dst, const int16_t *src, const int16_t *weight, int top, int bottom, int left,
|
||||
int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
const int16_t *src_h = src + top * sliding->out_h_step_;
|
||||
for (int ih = top; ih < bottom; ih++) {
|
||||
int oh = ih * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -707,8 +695,8 @@ void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t *
|
|||
const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C4NUM;
|
||||
int32_t *dst_kernel = dst_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_;
|
||||
|
||||
DeconvDepthwiseBorderPixelInt8(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_);
|
||||
DeconvDwInt8BorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw,
|
||||
sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_);
|
||||
src_kernel += sliding->block_channel_;
|
||||
} // width loop
|
||||
src_h += sliding->out_h_step_;
|
||||
|
@ -716,9 +704,9 @@ void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t *
|
|||
}
|
||||
|
||||
#ifndef ENABLE_ARM64
|
||||
void DeconvDepthwiseCenterInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width,
|
||||
int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step,
|
||||
int in_sw_step, int in_kh_step, int in_kw_step) {
|
||||
void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, int kernel_h,
|
||||
int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, int in_kh_step,
|
||||
int in_kw_step) {
|
||||
int32_t *dst_h = dst;
|
||||
const int16_t *src_h = src;
|
||||
for (int oh = 0; oh < height; oh++) {
|
||||
|
@ -784,14 +772,14 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in
|
|||
const int16_t *weight = weight_data + oc * sliding->kernel_step_;
|
||||
const int32_t *bias = bias_data + oc * C4NUM;
|
||||
int8_t *dst_data = dst + oc * C4NUM;
|
||||
DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param,
|
||||
sliding);
|
||||
DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->bottom_, conv_param->input_h_, 0,
|
||||
conv_param->input_w_, conv_param, sliding);
|
||||
DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_,
|
||||
conv_param, sliding);
|
||||
DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->input_w_, conv_param, sliding);
|
||||
DeconvDwInt8Border(output_buffer, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param,
|
||||
sliding);
|
||||
DeconvDwInt8Border(output_buffer, src_data, weight, sliding->bottom_, conv_param->input_h_, 0,
|
||||
conv_param->input_w_, conv_param, sliding);
|
||||
DeconvDwInt8Border(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_,
|
||||
conv_param, sliding);
|
||||
DeconvDwInt8Border(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_,
|
||||
conv_param->input_w_, conv_param, sliding);
|
||||
|
||||
if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) {
|
||||
int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -806,10 +794,9 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in
|
|||
sliding->in_sw_step_ * sizeof(int32_t), sliding->in_kh_step_ * sizeof(int32_t),
|
||||
sliding->in_kw_step_ * sizeof(int32_t));
|
||||
#else
|
||||
DeconvDepthwiseCenterInt8(out_t, in_t, weight, sliding->bottom_ - sliding->top_,
|
||||
sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_,
|
||||
sliding->out_h_step_, sliding->block_channel_, sliding->in_sh_step_,
|
||||
sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_);
|
||||
DeconvDwInt8Center(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
|
||||
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_,
|
||||
sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_);
|
||||
#endif
|
||||
}
|
||||
DeconvDwInt8Post(dst_data, output_buffer, bias, sliding->block_channel_,
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool CheckIfUse3X3(const ConvParameter *conv_param);
|
||||
bool CheckConvDwInt8Use3X3(const ConvParameter *conv_param);
|
||||
|
||||
void ConvDwInt8(int8_t *output_data, int32_t *output_row, const int8_t *input_data, const int16_t *weight_data,
|
||||
const int32_t *bias_data, const ConvParameter *conv_param, int task_id);
|
||||
|
@ -36,7 +36,7 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data
|
|||
const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
|
||||
int task_id);
|
||||
|
||||
void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
||||
void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
|
||||
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param,
|
||||
const SlidingWindowParam *sliding, int task_id);
|
||||
|
||||
|
|
|
@ -118,7 +118,7 @@ int ConvolutionDepthwiseSWCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) {
|
||||
ConvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
|
||||
ConvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
|
||||
sliding_, task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -130,7 +130,7 @@ int DeconvolutionDepthwiseCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) {
|
||||
DeconvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
|
||||
DeconvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
|
||||
sliding_, task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -99,6 +99,16 @@ int ConvolutionDepthwise3x3Int8CPUKernel::Init() {
|
|||
MS_LOG(ERROR) << "new sliding window param.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set quant param failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = InitWeightBias();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
|
||||
return ret;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -108,17 +118,7 @@ int ConvolutionDepthwise3x3Int8CPUKernel::Init() {
|
|||
int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() {
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_);
|
||||
auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set quant param failed.";
|
||||
return ret;
|
||||
}
|
||||
conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_);
|
||||
ret = InitWeightBias();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,14 +90,6 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseInt8CPUKernel::Init() {
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int ConvolutionDepthwiseInt8CPUKernel::ReSize() {
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set quant param failed.";
|
||||
|
@ -109,6 +101,14 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() {
|
|||
MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
|
||||
return ret;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int ConvolutionDepthwiseInt8CPUKernel::ReSize() {
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -181,7 +181,7 @@ kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *>
|
|||
conv_param->output_w_ = outputs[kOutputIndex]->Width();
|
||||
}
|
||||
auto weight_quant_size = inputs[kWeightIndex]->GetQuantParams().size();
|
||||
if (CheckIfUse3X3(conv_param) && weight_quant_size == 1) {
|
||||
if (CheckConvDwInt8Use3X3(conv_param) && weight_quant_size == 1) {
|
||||
#ifdef ENABLE_ARM64
|
||||
kernel =
|
||||
new (std::nothrow) kernel::ConvolutionDepthwise3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
|
|
|
@ -275,16 +275,6 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Init() {
|
|||
MS_LOG(ERROR) << "new sliding window param.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() {
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
InitSlidingParamConvDw(sliding_, conv_param_, C8NUM);
|
||||
|
||||
auto ret = ConvolutionBaseCPUKernel::SetQuantParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set quant param failed.";
|
||||
|
@ -295,17 +285,25 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() {
|
|||
MS_LOG(ERROR) << "reinit quant param failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = InitWeightBias();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!";
|
||||
return ret;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() {
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
InitSlidingParamConvDw(sliding_, conv_param_, C8NUM);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionDepthwiseSWInt8CPUKernel::Execute(int task_id) {
|
||||
ConvDwSWInt8(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), input_zp_,
|
||||
ConvDwInt8SW(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), input_zp_,
|
||||
output_zp_, conv_param_, sliding_, task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue