!5272 add opencl kernels to libmindspore-lite.so

Merge pull request !5272 from wandongdong/master
2020-08-26 20:35:08 +08:00 · 2020-08-26 20:35:08 +08:00 · 709be5eb4f
parent 275e286216 8d0082dfba
commit 709be5eb4f
7 changed files with 16 additions and 26 deletions
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -36,11 +36,19 @@ file(GLOB_RECURSE C_OPS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cc)
 add_library(mindspore-lite SHARED ${LITE_SRC} ${C_OPS_SRC})
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
 add_library(anf OBJECT ${ANF_SRC})
+if (SUPPORT_GPU)
+  add_subdirectory(runtime/kernel/opencl)
+  target_link_libraries(mindspore-lite
+      anf
+      cpu_kernel_mid_
+      opencl_kernel_lib_
+      )
+else ()
  target_link_libraries(mindspore-lite
      anf
      cpu_kernel_mid_
      )
-
+endif ()
 add_subdirectory(runtime/kernel/arm)
 if (PLATFORM_ARM32 OR PLATFORM_ARM64)
  target_link_libraries(mindspore-lite log)
--- a/mindspore/lite/src/runtime/kernel/opencl/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/kernel/opencl/CMakeLists.txt
@ -1,16 +1,2 @@
-set(OPENCL_KERNEL_SRC
-    ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_opencl_kernel.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/utils.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/arithmetic.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/convolution.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/depthwise_conv2d.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/pooling2d.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/matmul.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/softmax.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/concat.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/conv2d_transpose.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/transpose.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/reshape.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/activation.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/kernel/caffe_prelu.cc
-    )
+file(GLOB_RECURSE OPENCL_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernel/*.cc)
+add_library(opencl_kernel_lib_ OBJECT ${KERNEL_SRC} ${OPENCL_KERNEL_SRC})
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
@ -131,7 +131,6 @@ int ArithmeticOpenCLKernel::Run() {
  MS_LOG(DEBUG) << this->name() << " Running!";
  auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();

-  uint32_t element_num = out_tensors_[0]->ElementsC4Num();
  int arg_idx = 0;

  ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
@ -40,7 +40,6 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel {
  int GetImageSize(size_t idx, std::vector<size_t> *img_size) override;

 private:
-  ConvParameter *parameter_;
  cl::Kernel kernel_;
  void *padWeight_;
  void *bias_;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@ -126,6 +126,7 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
    im_dst_y = h;
  } else {
    MS_LOG(ERROR) << "Unsupported format. " << out_tensors_[0]->GetFormat();
+    return RET_ERROR;
  }
  img_size->clear();
  auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable();
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
@ -129,7 +129,6 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
    printf("%.3f ", output_data[i]);
  }
  printf("\n");
-  size_t output_size = tensor_out->Size();
  float expect[4] = {2.0f, 3.0f, 4.0f, 5.0f};

  for (int i = 0; i < tensor_out->ElementsNum(); ++i)
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
@ -54,7 +54,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat
  }

  // pack weight
-  int OC4 = UP_DIV(conv_param->output_channel_, C4NUM);
  int pack_weight_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_;
  T1 *packed_weight = weight_data;

@ -103,7 +102,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat

  std::vector<kernel::LiteKernel *> kernels{pKernel.get()};
  std::vector<lite::tensor::Tensor *> inputs_{&tensor_a};
-  size_t C4 = UP_DIV(inputs[0]->Channel(), C4NUM);
  auto pGraph = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs_, outputs, kernels, kernels, kernels);
  if (pGraph.get() == nullptr) {
    delete[] packed_input;