!23957 [MS][LITE]Add Lite gitee enable

Merge pull request !23957 from gongdaguo/gitee_enable
2021-09-23 12:27:56 +00:00 · 2021-09-23 12:27:56 +00:00 · 73d75f6368
parent 79d1f3cfd3 60e1752ecd
commit 73d75f6368
4 changed files with 67 additions and 65 deletions
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@ -199,29 +199,6 @@ if(MSVC)
  set(MSLITE_ENABLE_CONVERTER off)
 endif()

-if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
-        NOT MSLITE_ENABLE_MINDRT
-        OR NOT MSLITE_STRING_KERNEL
-        OR NOT MSLITE_CONTROLFLOW_TENSORLIST
-        OR NOT MSLITE_WEIGHT_DECODE
-        OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
-    message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
-            "MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
-            "is configured as off, MSLITE_ENABLE_CONVERTER and MSLITE_ENABLE_TESTCASES must also be configured as off")
-endif()
-
-if(((MSLITE_GPU_BACKEND STREQUAL tensorrt) OR MSLITE_ENABLE_NPU) AND (
-        NOT MSLITE_DELEGATE_USE))
-    message(FATAL_ERROR "If MSLITE_DELEGATE_USE use is configured as off, MSLITE_ENABLE_NPU must also be configured
-    as off and MSLITE_GPU_BACKEND nor can it be configured as tensorrt.")
-endif()
-
-if(MSLITE_ENABLE_FP16 AND PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
-        AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
-    message(FATAL_ERROR "If you want to build fp16 in arm82_a32, \
-    your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
-endif()
-
 message(STATUS "************MindSpore Lite Build Option:************")
 message(STATUS "\tMSLITE_GPU_BACKEND                = \t${MSLITE_GPU_BACKEND}")
 message(STATUS "\tMSLITE_ENABLE_NPU                 = \t${MSLITE_ENABLE_NPU}")
@ -247,6 +224,29 @@ message(STATUS "\tMSLITE_ENABLE_FP16                = \t${MSLITE_ENABLE_FP16}")
 message(STATUS "\tMSLITE_ENABLE_MODEL_ENCRYPTION    = \t${MSLITE_ENABLE_MODEL_ENCRYPTION}")
 message(STATUS "\tMSLITE_ENABLE_SPARSE_COMPUTE      = \t${MSLITE_ENABLE_SPARSE_COMPUTE}")

+if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
+        NOT MSLITE_ENABLE_MINDRT
+        OR NOT MSLITE_STRING_KERNEL
+        OR NOT MSLITE_CONTROLFLOW_TENSORLIST
+        OR NOT MSLITE_WEIGHT_DECODE
+        OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
+    message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
+            "MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
+            "is configured as off, MSLITE_ENABLE_CONVERTER and MSLITE_ENABLE_TESTCASES must also be configured as off")
+endif()
+
+if(((MSLITE_GPU_BACKEND STREQUAL tensorrt) OR MSLITE_ENABLE_NPU) AND (
+        NOT MSLITE_DELEGATE_USE))
+    message(FATAL_ERROR "If MSLITE_DELEGATE_USE use is configured as off, MSLITE_ENABLE_NPU must also be configured
+    as off and MSLITE_GPU_BACKEND nor can it be configured as tensorrt.")
+endif()
+
+if(MSLITE_ENABLE_FP16 AND PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
+        AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
+    message(FATAL_ERROR "If you want to build fp16 in arm82_a32, \
+    your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
+endif()
+
 if(MSLITE_ENABLE_HIGH_PERFORMANCE)
    add_compile_definitions(ENABLE_HIGH_PERFORMANCE)
 endif()
--- a/mindspore/lite/build_lite.sh
+++ b/mindspore/lite/build_lite.sh
@ -64,7 +64,7 @@ build_lite_x86_64_jni_and_jar() {
    rm -rf java/jni && mkdir -pv java/jni
    cd java/jni
    cmake -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-          -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DSUPPORT_TRAIN=${is_train} "${LITE_JAVA_PATH}/native/"
+          $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DSUPPORT_TRAIN=${is_train} "${LITE_JAVA_PATH}/native/"
    make -j$THREAD_NUM
    if [[ $? -ne 0 ]]; then
        echo "---------------- mindspore lite: build jni x86_64 failed----------------"
@ -163,7 +163,7 @@ build_lite() {
        echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
        cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19"         \
              -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang"     \
-              -DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+              -DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
              -DPLATFORM_ARM64="on" -DENABLE_NEON="on" -DMSLITE_ENABLE_FP16="on" -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp           \
              -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION}   \
              -DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
@ -180,13 +180,13 @@ build_lite() {
        echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
        cmake -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DTOOLCHAIN_NAME=${CMAKE_TOOLCHAIN_NAME} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL}          \
              -DANDROID_NDK=${CMAKE_ANDROID_NDK} -DANDROID_ABI=${CMAKE_ANDROID_ABI} -DANDROID_TOOLCHAIN_NAME=${CMAKE_ANDROID_TOOLCHAIN_NAME}                    \
-              -DANDROID_STL=${CMAKE_ANDROID_STL}  -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+              -DANDROID_STL=${CMAKE_ANDROID_STL}  $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
              -DPLATFORM_ARM32="on" -DENABLE_NEON="on"  -DMSLITE_ENABLE_FP16=${MSLITE_ENABLE_FP16} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp           \
              -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION}    \
              -DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
      fi
    else
-        cmake -DPLATFORM_X86_64=on -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE}              \
+        cmake -DPLATFORM_X86_64=on $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE}              \
              -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
              -DENABLE_ASAN=${ENABLE_ASAN} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
    fi
@ -282,7 +282,7 @@ build_lite_arm64_and_jni() {
    cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19"      \
          -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang"  \
          -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-          -DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
+          -DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
          -DSUPPORT_TRAIN=${is_train} -DPLATFORM_ARM64=on "${LITE_JAVA_PATH}/native/"
    make -j$THREAD_NUM
    if [[ $? -ne 0 ]]; then
@ -326,7 +326,7 @@ build_lite_arm32_and_jni() {
    cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19"      \
          -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="armeabi-v7a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang"  \
          -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-          -DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
+          -DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
          -DSUPPORT_TRAIN=${is_train} -DPLATFORM_ARM32=on "${LITE_JAVA_PATH}/native"
    make -j$THREAD_NUM
    if [[ $? -ne 0 ]]; then
@ -397,13 +397,19 @@ update_submodule()
 }

 LITE_JAVA_PATH=${BASEPATH}/mindspore/lite/java
-LITE_BUILD_TYPE="Release"
 if [[ "${MSLITE_ENABLE_ACL}" == "on" ]]; then
    update_submodule
 fi
+
 if [[ "${DEBUG_MODE}" == "on" ]]; then
-    LITE_BUILD_TYPE="Debug"
+    CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug "
+else
+    CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release "
 fi
+if [[ "X$ENABLE_GITEE" = "Xon" ]]; then
+    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON"
+fi
+
 if [[ "X$LITE_ENABLE_AAR" = "Xon" ]]; then
    build_aar
 elif [[ "X$LITE_PLATFORM" != "X" ]]; then
--- a/mindspore/lite/examples/runtime_gpu_extend/src/custom_add_kernel_gpu.cc
+++ b/mindspore/lite/examples/runtime_gpu_extend/src/custom_add_kernel_gpu.cc
@ -38,33 +38,31 @@ class CustomAddKernelGpu : public kernel::Kernel {
  CustomAddKernelGpu(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
                     const schema::Primitive *primitive, const mindspore::Context *ctx,
                     const std::string &build_options, bool fp16_enable)
-      : Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {
-    opencl_runtime_ = new registry::opencl::OpenCLRuntimeWrapper();
-  }
+      : Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {}
  ~CustomAddKernelGpu() override { FreeWeight(); }
  // Prepare will be called during graph compilation
  int Prepare() override {
    const std::string kernel_name_ = "ElementAdd";
    const std::string program_name = "Arithmetic";
    std::string source = arithmetic_source;
-    if (opencl_runtime_->LoadSource(program_name, source) != kSuccess) {
+    if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
      std::cerr << "Load source failed.";
      return lite::RET_ERROR;
    }
    std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};

    build_options_ext.push_back(build_options_);
-    if (opencl_runtime_->BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
+    if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
      std::cerr << "Build kernel failed.";
      return lite::RET_ERROR;
    }

-    auto out_shape = custom_common::GpuTensorInfo(&outputs_[0], opencl_runtime_);
+    auto out_shape = custom_common::GpuTensorInfo(&outputs_[0], &opencl_runtime_);
    local_range_ = cl::NullRange;
    global_range_ = cl::NDRange(out_shape.width, out_shape.height);
    for (int i = 0; i < inputs_.size(); ++i) {
      auto &in_tensor = inputs_.at(i);
-      custom_common::GpuTensorInfo in_shape = custom_common::GpuTensorInfo(&in_tensor, opencl_runtime_);
+      custom_common::GpuTensorInfo in_shape = custom_common::GpuTensorInfo(&in_tensor, &opencl_runtime_);
      if (in_tensor.IsConst()) {
        std::vector<char> weight(in_shape.Image2DSize, 0);
        bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
@ -72,7 +70,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
                        in_tensor.DataType());
        DataType dtype =
          fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
-        auto allocator = opencl_runtime_->GetAllocator();
+        auto allocator = opencl_runtime_.GetAllocator();
        if (allocator == nullptr) {
          std::cerr << "GetAllocator fail.";
          FreeWeight();
@ -86,7 +84,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
        }
        weight_ptrs_.push_back(weight_ptr);
        // Use API to write GPU memory
-        if (opencl_runtime_->WriteImage(weight_ptr, weight.data()) != kSuccess) {
+        if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
          std::cerr << "WriteImage fail.";
          FreeWeight();
          return lite::RET_ERROR;
@ -98,7 +96,7 @@ class CustomAddKernelGpu : public kernel::Kernel {

    int arg_idx = 3;
    cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx << "failed.";
      FreeWeight();
      return lite::RET_ERROR;
@ -118,19 +116,19 @@ class CustomAddKernelGpu : public kernel::Kernel {
    auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
    auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
    int arg_idx = 0;
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
      return lite::RET_ERROR;
    }
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
      return lite::RET_ERROR;
    }
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
      return lite::RET_ERROR;
    }
-    if (opencl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
+    if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
      std::cerr << "Run kernel failed.";
      return lite::RET_ERROR;
    }
@ -192,7 +190,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
  cl::NDRange global_range_{cl::NullRange};
  cl::NDRange local_range_{cl::NullRange};
  std::vector<void *> weight_ptrs_;
-  registry::opencl::OpenCLRuntimeWrapper *opencl_runtime_;
+  registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;

  int PreProcess() {
    int ret = 0;
@ -202,7 +200,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
    }
    for (auto i = 0; i < outputs_.size(); ++i) {
      auto *output = &outputs_.at(i);
-      auto img_info = custom_common::GpuTensorInfo(output, opencl_runtime_);
+      auto img_info = custom_common::GpuTensorInfo(output, &opencl_runtime_);
      auto allocator = output->allocator();
      if (allocator == nullptr) {
        std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
@ -219,7 +217,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
  }

  void FreeWeight() {
-    auto allocator = opencl_runtime_->GetAllocator();
+    auto allocator = opencl_runtime_.GetAllocator();
    if (allocator == nullptr) {
      std::cerr << "GetAllocator fail.";
      return;
--- a/mindspore/lite/test/ut/src/registry/registry_gpu_custom_op_test.cc
+++ b/mindspore/lite/test/ut/src/registry/registry_gpu_custom_op_test.cc
@ -158,33 +158,31 @@ class CustomAddKernel : public kernel::Kernel {
  CustomAddKernel(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
                  const schema::Primitive *primitive, const mindspore::Context *ctx, const std::string &build_options,
                  bool fp16_enable)
-      : Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {
-    opencl_runtime_ = new registry::opencl::OpenCLRuntimeWrapper();
-  }
+      : Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {}
  ~CustomAddKernel() override { FreeWeight(); }
  // Prepare will be called during graph compilation
  int Prepare() override {
    const std::string kernel_name_ = "ElementAdd";
    const std::string program_name = "Arithmetic";
    std::string source = arithmetic_source;
-    if (opencl_runtime_->LoadSource(program_name, source) != kSuccess) {
+    if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
      std::cerr << "Load source failed.";
      return lite::RET_ERROR;
    }
    std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};

    build_options_ext.push_back(build_options_);
-    if (opencl_runtime_->BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
+    if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
      std::cerr << "Build kernel failed.";
      return lite::RET_ERROR;
    }

-    auto out_shape = GpuTensorInfo(&outputs_[0], opencl_runtime_);
+    auto out_shape = GpuTensorInfo(&outputs_[0], &opencl_runtime_);
    local_range_ = cl::NullRange;
    global_range_ = cl::NDRange(out_shape.width, out_shape.height);
    for (int i = 0; i < inputs_.size(); ++i) {
      auto &in_tensor = inputs_.at(i);
-      GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, opencl_runtime_);
+      GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, &opencl_runtime_);
      if (in_tensor.IsConst()) {
        std::vector<char> weight(in_shape.Image2DSize, 0);
        bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
@ -192,7 +190,7 @@ class CustomAddKernel : public kernel::Kernel {
                        in_tensor.DataType());
        DataType dtype =
          fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
-        auto allocator = opencl_runtime_->GetAllocator();
+        auto allocator = opencl_runtime_.GetAllocator();
        if (allocator == nullptr) {
          std::cerr << "GetAllocator fail.";
          FreeWeight();
@ -205,7 +203,7 @@ class CustomAddKernel : public kernel::Kernel {
          return lite::RET_ERROR;
        }
        weight_ptrs_.push_back(weight_ptr);
-        if (opencl_runtime_->WriteImage(weight_ptr, weight.data()) != kSuccess) {
+        if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
          std::cerr << "WriteImage fail.";
          FreeWeight();
          return lite::RET_ERROR;
@ -217,7 +215,7 @@ class CustomAddKernel : public kernel::Kernel {

    int arg_idx = 3;
    cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx << "failed.";
      FreeWeight();
      return lite::RET_ERROR;
@ -237,19 +235,19 @@ class CustomAddKernel : public kernel::Kernel {
    auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
    auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
    int arg_idx = 0;
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
      return lite::RET_ERROR;
    }
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
      return lite::RET_ERROR;
    }
-    if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
+    if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
      std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
      return lite::RET_ERROR;
    }
-    if (opencl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
+    if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
      std::cerr << "Run kernel failed.";
      return lite::RET_ERROR;
    }
@ -311,7 +309,7 @@ class CustomAddKernel : public kernel::Kernel {
  cl::NDRange global_range_{cl::NullRange};
  cl::NDRange local_range_{cl::NullRange};
  std::vector<void *> weight_ptrs_;
-  registry::opencl::OpenCLRuntimeWrapper *opencl_runtime_;
+  registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;

  int PreProcess() {
    int ret;
@ -321,7 +319,7 @@ class CustomAddKernel : public kernel::Kernel {
    }
    for (auto i = 0; i < outputs_.size(); ++i) {
      auto *output = &outputs_.at(i);
-      auto img_info = GpuTensorInfo(output, opencl_runtime_);
+      auto img_info = GpuTensorInfo(output, &opencl_runtime_);
      auto allocator = output->allocator();
      if (allocator == nullptr) {
        std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
@ -382,7 +380,7 @@ class CustomAddKernel : public kernel::Kernel {
  }

  void FreeWeight() {
-    auto allocator = opencl_runtime_->GetAllocator();
+    auto allocator = opencl_runtime_.GetAllocator();
    if (allocator == nullptr) {
      std::cerr << "GetAllocator fail.";
      return;