From a5bd25482132dbb114608aecc60836f4c2dd2da9 Mon Sep 17 00:00:00 2001 From: lixian <179220644@qq.com> Date: Wed, 19 Aug 2020 15:38:13 +0800 Subject: [PATCH] add fp32 sliding window kernel --- .../runtime/kernel/arm/fp32/convolution.cc | 3 +- .../nnacl/assembly/arm64/ConvDwFp32Center.S | 8 +- .../nnacl/assembly/arm64/ConvDwInt8Center.S | 8 +- .../arm/nnacl/assembly/arm64/ConvFp32Center.S | 446 ++++++++++++++++++ .../kernel/arm/nnacl/fp32/common_func.h | 5 + .../src/runtime/kernel/arm/nnacl/fp32/conv.c | 19 +- 6 files changed, 481 insertions(+), 8 deletions(-) create mode 100644 mindspore/lite/src/runtime/kernel/arm/nnacl/assembly/arm64/ConvFp32Center.S diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc index 2d8ba072aff..3e68a411494 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc @@ -258,8 +258,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector +#include "nnacl/fp32/common_func.h" #include "nnacl/winograd_transform.h" void SWBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width, @@ -83,6 +84,7 @@ void SWBorder(float *dst, const float *src, const float *weight, const float *bi } // height loop } +#ifndef ENABLE_ARM64 void SWCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int ic4, int in_sh_step, int in_sw_step, int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) { @@ -135,6 +137,7 @@ void SWCenter(float *dst, const float *src, const float *weight, const float *bi src_h += in_sh_step; } // dst_height loop } +#endif // fp32 sliding window void ConvSWFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *tmp_out_block, @@ -172,11 +175,23 @@ void ConvSWFp32(const float *input_data, const float *packed_weight, const float src_data + in_h_start * slidingWindow_param->in_h_step_ + in_w_start * slidingWindow_param->ic4_channel_; float *out_t = dst_data + slidingWindow_param->top_ * slidingWindow_param->out_h_step_ + slidingWindow_param->left_ * slidingWindow_param->block_channel_; +#ifdef ENABLE_ARM64 + ConvSwFp32Center(out_t, in_t, weight, bias, slidingWindow_param->bottom_ - slidingWindow_param->top_, + slidingWindow_param->right_ - slidingWindow_param->left_, conv_param->kernel_h_, + conv_param->kernel_w_, slidingWindow_param->out_h_step_ * sizeof(float), + slidingWindow_param->block_channel_ * sizeof(float), ic4, + slidingWindow_param->in_sh_step_ * sizeof(float), + slidingWindow_param->in_sw_step_ * sizeof(float), + slidingWindow_param->in_kh_step_ * sizeof(float), + slidingWindow_param->in_kw_step_ * sizeof(float), + conv_param->is_relu_, conv_param->is_relu6_); +#else SWCenter(out_t, in_t, weight, bias, slidingWindow_param->bottom_ - slidingWindow_param->top_, - slidingWindow_param->right_ - slidingWindow_param->left_, conv_param->kernel_h_, conv_param->kernel_w_, - slidingWindow_param->out_h_step_, slidingWindow_param->block_channel_, ic4, + slidingWindow_param->right_ - slidingWindow_param->left_, conv_param->kernel_h_, + conv_param->kernel_w_, slidingWindow_param->out_h_step_, slidingWindow_param->block_channel_, ic4, slidingWindow_param->in_sh_step_, slidingWindow_param->in_sw_step_, slidingWindow_param->in_kh_step_, slidingWindow_param->in_kw_step_, conv_param->is_relu_, conv_param->is_relu6_); +#endif } } // output C4 loop src += slidingWindow_param->in_step_;