!39851 [MSLITE][CPU] conv 1x1 slidewindows, runtime part code refactor

Merge pull request !39851 from Greatpan/conv_sw1x1_avx_master
2022-08-08 02:10:58 +00:00 · 2022-08-08 02:10:58 +00:00 · 2b35007b0c
parent e4f5dad939 72ba5a2c80
commit 2b35007b0c
5 changed files with 97 additions and 17 deletions
--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
@ -34,7 +34,7 @@
 #include "src/litert/kernel/cpu/fp32/convolution_depthwise_indirect_fp32.h"
 #endif
 #ifdef ENABLE_AVX
-#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h"
+#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h"
 #endif

 using mindspore::lite::KernelRegistrar;
@ -227,7 +227,7 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32NHWCKernelSelect()

 #ifdef ENABLE_AVX
  if (kernel == nullptr && CheckAvxUseSWConv(conv_param)) {
-    kernel = new (std::nothrow) kernel::ConvolutionSWCPUKernel(
+    kernel = new (std::nothrow) kernel::ConvolutionSWAVXCPUKernel(
      op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->ms_context_),
      origin_weight_, origin_bias_);
  }
--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.cc
+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.cc
@ -0,0 +1,43 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef ENABLE_AVX
+#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h"
+#include "nnacl/fp32/conv_common_fp32.h"
+#include "nnacl/fp32/conv_1x1_x86_fp32.h"
+
+namespace mindspore::kernel {
+void ConvolutionSWAVXCPUKernel::InitGlobalVariable() {
+  oc_tile_ = C8NUM;
+  oc_res_ = conv_param_->output_channel_ % oc_tile_;
+  if (conv_param_->kernel_h_ == 1 && conv_param_->kernel_w_ == 1) {
+    // 1x1 conv is aligned to C8NUM
+    in_tile_ = C8NUM;
+    ic_res_ = conv_param_->input_channel_ % in_tile_;
+  }
+}
+
+int ConvolutionSWAVXCPUKernel::RunImpl(int task_id) {
+  if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
+    Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
+                     output_data_, task_id, conv_param_, slidingWindow_param_);
+  } else {
+    ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
+               output_data_, task_id, conv_param_, slidingWindow_param_);
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
+#endif  // ENABLE_AVX
--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h
+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_avx_fp32.h
@ -0,0 +1,35 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_AVX_FP32_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_AVX_FP32_H_
+#ifdef ENABLE_AVX
+#include <vector>
+#include "src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h"
+
+namespace mindspore::kernel {
+class ConvolutionSWAVXCPUKernel : public ConvolutionSWCPUKernel {
+ public:
+  ConvolutionSWAVXCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                            float *origin_weight, float *origin_bias)
+      : ConvolutionSWCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
+
+  void InitGlobalVariable() override;
+  int RunImpl(int task_id) override;
+};
+}  // namespace mindspore::kernel
+#endif  // ENABLE_AVX
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_CONVOLUTION_SLIDEWINDOW_FP32_H_
--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.cc
+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.cc
@ -28,14 +28,19 @@ using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;

 namespace mindspore::kernel {
-int ConvolutionSWCPUKernel::Prepare() {
-  oc_tile_ = C8NUM;
+void ConvolutionSWCPUKernel::InitGlobalVariable() {
+  oc_tile_ = C1NUM;
  oc_res_ = conv_param_->output_channel_ % oc_tile_;
  if (conv_param_->kernel_h_ == 1 && conv_param_->kernel_w_ == 1) {
-    // 1x1 conv is aligned to C8NUM
-    in_tile_ = C8NUM;
+    // 1x1 conv is aligned to C1NUM
+    in_tile_ = C1NUM;
    ic_res_ = conv_param_->input_channel_ % in_tile_;
  }
+}
+
+int ConvolutionSWCPUKernel::Prepare() {
+  InitGlobalVariable();
+
  if (op_parameter_->is_train_session_) {
    auto filter_tensor = in_tensors_.at(kWeightIndex);
    CHECK_NULL_RETURN(filter_tensor);
@ -91,14 +96,8 @@ int ConvolutionSWCPUKernel::ReSize() {
 }

 int ConvolutionSWCPUKernel::RunImpl(int task_id) {
-  if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
-    Conv1x1SWAVXFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
-                     output_data_, task_id, conv_param_, slidingWindow_param_);
-  } else {
-    ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
-               output_data_, task_id, conv_param_, slidingWindow_param_);
-  }
-  return RET_OK;
+  MS_LOG(ERROR) << "new SlidingWindow run fail, do not support slidewindows fp32 implement!";
+  return RET_ERROR;
 }

 int ConvolutionSWImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
@ -127,8 +126,8 @@ int ConvolutionSWCPUKernel::InitTmpBuffer() {
      MS_LOG(ERROR) << "malloc tmp input_data_ failed.";
      return RET_NULL_PTR;
    }
-    PackNHWCToNHWC8Fp32(input_data, input_data_, conv_param_->input_batch_,
-                        conv_param_->input_w_ * conv_param_->input_h_, conv_param_->input_channel_);
+    PackNHWCToNHWCXFp32(input_data, input_data_, conv_param_->input_batch_,
+                        conv_param_->input_w_ * conv_param_->input_h_, conv_param_->input_channel_, oc_tile_);
  } else {
    input_data_ = input_data;
  }
--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h
+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_slidewindow_fp32.h
@ -36,10 +36,11 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
    }
  }

+  virtual void InitGlobalVariable();
  int Prepare() override;
  int ReSize() override;
  int Run() override;
-  int RunImpl(int task_id);
+  virtual int RunImpl(int task_id);
  int InitTmpBuffer();

 private:
@ -55,6 +56,8 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
      input_data_ = nullptr;
    }
  }
+
+ protected:
  int oc_tile_ = C8NUM;  // oc tile is C8NUM in avx
  int in_tile_ = 0;      // input channel algin
  int oc_res_ = 0;