diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc index 2d8ba072aff..3e68a411494 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc @@ -258,8 +258,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector +#include "nnacl/fp32/common_func.h" #include "nnacl/winograd_transform.h" void SWBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width, @@ -83,6 +84,7 @@ void SWBorder(float *dst, const float *src, const float *weight, const float *bi } // height loop } +#ifndef ENABLE_ARM64 void SWCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int ic4, int in_sh_step, int in_sw_step, int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) { @@ -135,6 +137,7 @@ void SWCenter(float *dst, const float *src, const float *weight, const float *bi src_h += in_sh_step; } // dst_height loop } +#endif // fp32 sliding window void ConvSWFp32(const float *input_data, const float *packed_weight, const float *bias_data, float *tmp_out_block, @@ -172,11 +175,23 @@ void ConvSWFp32(const float *input_data, const float *packed_weight, const float src_data + in_h_start * slidingWindow_param->in_h_step_ + in_w_start * slidingWindow_param->ic4_channel_; float *out_t = dst_data + slidingWindow_param->top_ * slidingWindow_param->out_h_step_ + slidingWindow_param->left_ * slidingWindow_param->block_channel_; +#ifdef ENABLE_ARM64 + ConvSwFp32Center(out_t, in_t, weight, bias, slidingWindow_param->bottom_ - slidingWindow_param->top_, + slidingWindow_param->right_ - slidingWindow_param->left_, conv_param->kernel_h_, + conv_param->kernel_w_, slidingWindow_param->out_h_step_ * sizeof(float), + slidingWindow_param->block_channel_ * sizeof(float), ic4, + slidingWindow_param->in_sh_step_ * sizeof(float), + slidingWindow_param->in_sw_step_ * sizeof(float), + slidingWindow_param->in_kh_step_ * sizeof(float), + slidingWindow_param->in_kw_step_ * sizeof(float), + conv_param->is_relu_, conv_param->is_relu6_); +#else SWCenter(out_t, in_t, weight, bias, slidingWindow_param->bottom_ - slidingWindow_param->top_, - slidingWindow_param->right_ - slidingWindow_param->left_, conv_param->kernel_h_, conv_param->kernel_w_, - slidingWindow_param->out_h_step_, slidingWindow_param->block_channel_, ic4, + slidingWindow_param->right_ - slidingWindow_param->left_, conv_param->kernel_h_, + conv_param->kernel_w_, slidingWindow_param->out_h_step_, slidingWindow_param->block_channel_, ic4, slidingWindow_param->in_sh_step_, slidingWindow_param->in_sw_step_, slidingWindow_param->in_kh_step_, slidingWindow_param->in_kw_step_, conv_param->is_relu_, conv_param->is_relu6_); +#endif } } // output C4 loop src += slidingWindow_param->in_step_;