From 564bf717d5da5d8b94997a3c204bb451af335709 Mon Sep 17 00:00:00 2001
From: Corleone <liuchao195@huawei.com>
Date: Sat, 22 Aug 2020 16:42:51 +0800
Subject: [PATCH] add create program from binary interface for opencl

---
 .../kernel/opencl/kernel/arithmetic.cc        |   5 +-
 .../kernel/opencl/kernel/conv2d_transpose.cc  |   2 +-
 .../kernel/opencl/kernel/depthwise_conv2d.cc  |   2 +-
 .../runtime/kernel/opencl/kernel/matmul.cc    |   2 +-
 .../runtime/kernel/opencl/kernel/pooling2d.cc |   2 +-
 .../runtime/kernel/opencl/kernel/reshape.cc   |   2 +-
 .../runtime/kernel/opencl/kernel/softmax.cc   |   2 +-
 .../runtime/kernel/opencl/kernel/to_format.cc |   2 +-
 .../runtime/kernel/opencl/kernel/transpose.cc |   2 +-
 .../src/runtime/opencl/opencl_allocator.cc    |  12 +-
 .../src/runtime/opencl/opencl_allocator.h     |   5 +-
 .../lite/src/runtime/opencl/opencl_runtime.cc | 158 +++++++-----------
 .../lite/src/runtime/opencl/opencl_runtime.h  |  74 ++++----
 .../lite/src/runtime/opencl/opencl_wrapper.cc |   9 +
 14 files changed, 132 insertions(+), 147 deletions(-)

diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
index 7fb0a15a58a..8fb4a2d52b1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
@@ -103,10 +103,7 @@ int ArithmeticOpenCLKernel::Init() {
 
   lite::STATUS error_code = RET_OK;
 #ifdef PROGRAM_WITH_IL
-  bool ret = runtime_->CreateKernelFromIL(kernel_(), kernel_name);
-  if (!ret) {
-    error_code = RET_ERROR;
-  }
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   if (out_mem_type_ == OpenCLMemType::IMG) {
     kernel_name += "_IMG";
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
index f14a00103c8..dcf776efe65 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
@@ -42,7 +42,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
   std::string kernel_name = "conv2d_transpose2x2";
   auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   std::string source = conv2d_transpose2x2_source;
   std::set<std::string> build_options;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
index fbd146992c7..b6c2e05b013 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@@ -64,7 +64,7 @@ int DepthwiseConv2dOpenCLKernel::Init() {
     kernel_name += "_1x1";
   }
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   std::string program_name = "DepthwiseConv2d";
   std::set<std::string> build_options;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
index 9593cc419d1..4dbfb318447 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
@@ -36,7 +36,7 @@ int MatMulOpenCLKernel::Init() {
   auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
 
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   std::set<std::string> build_options;
   std::string source = matmul_source;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
index 6e181c5ec28..65cac2c069c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
@@ -62,7 +62,7 @@ int PoolingOpenCLKernel::Init() {
   auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
 
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   if (out_mem_type_ == OpenCLMemType::BUF) {
     kernel_name += "_BUF";
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
index 6fc2f4ce313..db644507ca2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
@@ -45,7 +45,7 @@ int ReshapeOpenCLKernel::Init() {
     return RET_ERROR;
   }
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   std::set<std::string> build_options;
   std::string source = reshape_source;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
index caa6a57f956..9422c08e600 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
@@ -107,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() {
     MS_LOG(EXCEPTION) << "Init `Softmax` kernel failed: Unsupported axis: " << parameter_->axis_;
   }
 #ifdef PROGRAM_WITH_IL
-  runtime_->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   if (!is_image_out_) {
     out_mem_type_ = OpenCLMemType::BUF;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
index 92c2c03479a..9e8361d2edc 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@@ -50,7 +50,7 @@ int ToFormatOpenCLKernel::Init() {
 
   this->set_name(kernel_name);
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   std::set<std::string> build_options;
   std::string source = to_format_source;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
index 9c2df7bc7b2..e6b1d888c85 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
@@ -41,7 +41,7 @@ int TransposeOpenCLKernel::Init() {
     kernel_name += "_IMG";
   }
 #ifdef PROGRAM_WITH_IL
-  ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
+  kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
 #else
   std::set<std::string> build_options;
   std::string source = transpose_source;
diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
index e39dcdf2778..fc9e1caf151 100644
--- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc
@@ -81,7 +81,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
   void *device_ptr = nullptr;
   void *image_ptr = nullptr;
 
-  if (svm_capabilities && svm_on_) {
+  if (svm_capabilities) {
     cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0;
     flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0;
     flags = flags | CL_MEM_READ_WRITE;
@@ -280,7 +280,7 @@ void OpenCLAllocator::Clear() {
 void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) {
   auto ocl_runtime = opencl::OpenCLRuntime::GetInstance();
   auto svm_capabilities = ocl_runtime->GetSVMCapabilities();
-  if (svm_capabilities && svm_on_) {
+  if (svm_capabilities) {
     if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
       auto it = allocated_list_.find(host_ptr);
       if (it == allocated_list_.end()) {
@@ -356,8 +356,8 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
   }
 }
 
-MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
-  MEM_TYPE mem_type{MEM_TYPE::BUF};
+MemType OpenCLAllocator::GetMemType(void *host_ptr) {
+  MemType mem_type{MemType::BUF};
   Lock();
   auto it = allocated_list_.find(host_ptr);
   if (it == allocated_list_.end()) {
@@ -367,9 +367,9 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
   }
   MemBuf *mem_buf = it->second;
   if (mem_buf->img_size.empty()) {
-    mem_type = MEM_TYPE::BUF;
+    mem_type = MemType::BUF;
   } else {
-    mem_type = MEM_TYPE::IMG;
+    mem_type = MemType::IMG;
   }
   UnLock();
   return mem_type;
diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h
index 87bf03cc5cd..b582e83bc6e 100644
--- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h
@@ -40,7 +40,7 @@ struct OpenclMemory {
   OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER};
 };
 
-enum class MEM_TYPE : char { BUF, IMG };
+enum class MemType : char { SVM, BUF, IMG };
 
 class OpenCLAllocator : public Allocator {
  public:
@@ -58,7 +58,7 @@ class OpenCLAllocator : public Allocator {
   void *GetBuffer(void *host_ptr);
   void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true);
   int UnmapBuffer(void *host_ptr, void *command_queue = nullptr);
-  MEM_TYPE GetMemType(void *host_ptr);
+  MemType GetMemType(void *host_ptr);
   int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
   void *Prepare(void *ptr) override {
     if (ptr != nullptr) {
@@ -86,7 +86,6 @@ class OpenCLAllocator : public Allocator {
   // 6 is empirical value
   int shift_factor_ = 6;
   bool lock_flag_ = false;
-  bool svm_on_{false};
 };
 
 }  // namespace mindspore::lite::opencl
diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
index af6ed559043..8a226bc4982 100644
--- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
@@ -138,27 +138,27 @@ int OpenCLRuntime::Init() {
                << max_work_item_sizes_[2];
 
   gpu_info_ = ParseGpuInfo(device_name, device_version);
-  cl_int err;
+  cl_int ret;
 #if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120)
   // create context from glcontext
   MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
   cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(),
                                           CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
-  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
+  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
 
-  if (err != CL_SUCCESS) {
+  if (ret != CL_SUCCESS) {
     MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then.";
-    context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &err);
+    context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
   }
 #else
   MS_LOG(INFO) << "Create common opencl context";
   //  cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
   //                                          CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
   //  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
-  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &err);
+  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
 #endif
-  if (err != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err);
+  if (ret != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
 
@@ -170,9 +170,8 @@ int OpenCLRuntime::Init() {
   auto success = device_->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fp_config);
   support_fp16_ = CL_SUCCESS == success && fp_config > 0;
 
-  err = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_);
-  svm_capabilities_ = 0;
-  if (err != CL_SUCCESS || svm_capabilities_ == 0) {
+  ret = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_);
+  if (ret != CL_SUCCESS || svm_capabilities_ == 0) {
     svm_capabilities_ = 0;
     MS_LOG(INFO) << "SVM capalibilties: "
                  << "NONE";
@@ -204,16 +203,16 @@ int OpenCLRuntime::Init() {
   properties |= CL_QUEUE_PROFILING_ENABLE;
 #endif
 
-  default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &err);
-  if (err != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err);
+  default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret);
+  if (ret != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
 
   allocator_ = std::make_shared<OpenCLAllocator>();
 #ifdef PROGRAM_WITH_IL
   std::string flag = "";
-  CreateProgramFromIL(g_program_binary, flag);
+  binary_program_ = CreateProgramFromIL(g_program_binary, flag);
 #endif
   init_done_ = true;
   MS_LOG(INFO) << "OpenCLRuntime init done!";
@@ -317,7 +316,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
       MS_LOG(ERROR) << "load program (" << program_name << ") failed!";
       return RET_ERROR;
     }
-    status = this->BuildProgram(build_options_str, &program);
+    status = this->BuildProgram(build_options_str, program);
     if (!status) {
       MS_LOG(ERROR) << program_name << " build failed!";
       return RET_ERROR;
@@ -325,53 +324,15 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
     program_map_.emplace(build_program_key, program);
   }
 
-  cl_int err;
-  kernel = cl::Kernel(program, kernel_name.c_str(), &err);
-  if (err != CL_SUCCESS) {
-    MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err);
+  cl_int ret;
+  kernel = cl::Kernel(program, kernel_name.c_str(), &ret);
+  if (ret != CL_SUCCESS) {
+    MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(ret);
     return RET_ERROR;
   }
   return RET_OK;
 }
 
-// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
-int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t> &global,
-                             const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
-  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
-  }
-  MS_ASSERT(local.size() == 0 || local.size() == global.size());
-  std::vector<size_t> internal_global_ws = global;
-  for (size_t i = 0; i < local.size(); ++i) {
-    internal_global_ws[i] = ROUND_UP(global[i], local[i]);
-  }
-
-  MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size();
-  for (size_t i = 0; i < global.size(); i++) {
-    MS_LOG(DEBUG) << "global[" << i << "] = " << global[i];
-  }
-  for (size_t i = 0; i < local.size(); i++) {
-    MS_LOG(DEBUG) << "local[" << i << "] = " << local[i];
-  }
-
-  cl::Event event;
-  cl_int error = CL_SUCCESS;
-  if (local.size() == 0) {
-    error =
-      clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), nullptr, 0, nullptr, nullptr);
-  } else {
-    error = clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), local.data(), 0,
-                                   nullptr, nullptr);
-  }
-
-  if (error != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error);
-    return RET_ERROR;
-  }
-  MS_LOG(DEBUG) << "RunKernel success!";
-  return RET_OK;
-}
-
 // Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
 int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global,
                              const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
@@ -392,9 +353,6 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
     MS_LOG(DEBUG) << "local[" << i << "] = " << local[i];
   }
 
-  cl::Event event;
-  cl_int err = CL_SUCCESS;
-
   cl::NDRange global_range = cl::NullRange;
   cl::NDRange local_range = cl::NullRange;
   if (global.size() == 1) {
@@ -417,10 +375,12 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
     return RET_ERROR;
   }
 
-  err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
+  cl::Event event;
+  cl_int ret = CL_SUCCESS;
+  ret = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
 
-  if (err != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err);
+  if (ret != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(ret);
     return RET_ERROR;
   }
   MS_LOG(DEBUG) << "RunKernel success!";
@@ -486,11 +446,11 @@ bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *pr
 }
 
 // build program with build options
-bool OpenCLRuntime::BuildProgram(const std::string &build_options, cl::Program *program) {
-  cl_int ret = program->build({*device_}, build_options.c_str());
+bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Program &program) {
+  cl_int ret = program.build({*device_}, build_options.c_str());
   if (ret != CL_SUCCESS) {
-    if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
-      std::string build_log = program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
+    if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
+      std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
       MS_LOG(ERROR) << "Program build log: " << build_log;
     }
     MS_LOG(ERROR) << "Build program failed: " << CLErrorCode(ret);
@@ -525,7 +485,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t
   return cl_ret == CL_SUCCESS;
 }
 
-void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, cl::CommandQueue *command_queue,
+void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue,
                                bool sync) const {
   if (command_queue == nullptr) {
     command_queue = default_command_queue_.get();
@@ -534,7 +494,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size,
 }
 
 int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
-  if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
+  if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
     return RET_OK;
   }
   if (command_queue == nullptr) {
@@ -543,7 +503,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
   return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
 }
 
-void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
+void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
                                cl::CommandQueue *command_queue) const {
   if (command_queue == nullptr) {
     command_queue = default_command_queue_.get();
@@ -555,7 +515,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, c
   return command_queue->enqueueMapImage(buffer, sync, flags, origin_, region_, &row_pitch, &slice_pitch);
 }
 
-int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
+int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
   if (command_queue == nullptr) {
     command_queue = default_command_queue_.get();
   }
@@ -563,7 +523,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm
 }
 
 int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const {
-  if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
+  if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
     return RET_OK;
   }
   if (command_queue == nullptr) {
@@ -586,43 +546,55 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
 
 int OpenCLRuntime::GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id) {
   size_t max_work_group_size;
-  cl_int err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
+  cl_int ret = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
                                         &max_work_group_size, nullptr);
-  if (err != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(err);
+  if (ret != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(ret);
   }
   return static_cast<int>(max_work_group_size);
 }
 
-bool OpenCLRuntime::CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name) {
+cl::Kernel OpenCLRuntime::GetKernelFromBinary(const std::string &kernel_name) {
   cl_int ret = CL_SUCCESS;
-  kernel = clCreateKernel(il_program_, kernel_name.c_str(), &ret);
+  cl::Kernel kernel = cl::Kernel(binary_program_, kernel_name.c_str(), &ret);
   if (ret != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Create kernel with IL failed: " << CLErrorCode(ret);
+    MS_LOG(ERROR) << "Create kernel with binary program failed: " << CLErrorCode(ret);
   }
-  return ret == CL_SUCCESS;
+  return kernel;
 }
 
 // build program with IL
-bool OpenCLRuntime::CreateProgramFromIL(const std::vector<u_char> program_binary, const std::string flag) {
+cl::Program OpenCLRuntime::CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag) {
 #if CL_HPP_TARGET_OPENCL_VERSION >= 210
-  size_t program_length = program_binary.size();
-  cl_int ret = CL_SUCCESS;
-  il_program_ = clCreateProgramWithIL((*context_)(), program_binary.data(), program_length, &ret);
-  if (ret != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Create program with IL failed: " << CLErrorCode(ret);
-    return false;
+  cl::Program program = cl::Program(*context_, binary);
+  bool status = BuildProgram(default_build_opts_, program);
+  if (!status) {
+    MS_LOG(ERROR) << "Build program with IL failed!";
   }
-
-  ret = clBuildProgram(il_program_, 1, &(*device_)(), flag.c_str(), NULL, NULL);
-  if (ret != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Build program with IL failed: " << CLErrorCode(ret);
-  }
-  return ret == CL_SUCCESS;
+  return program;
 #else
   MS_LOG(ERROR) << "Create program with IL failed! The compute capabitity of device should be 2.1 and higher.";
-  return false;
+  return cl::Program();
 #endif
 }
 
+// build program with binary
+cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary,
+                                                   const std::string &flag) {
+  cl::Program program = cl::Program(*context_, {*device_}, binary);
+  bool status = BuildProgram(default_build_opts_, program);
+  if (!status) {
+    MS_LOG(ERROR) << "Build program with binary failed!";
+  }
+  return program;
+}
+
+std::vector<std::vector<unsigned char>> OpenCLRuntime::GetProgramBinaries(const cl::Program &program) {
+  cl_int ret = CL_SUCCESS;
+  auto binary = program.getInfo<CL_PROGRAM_BINARIES>(&ret);
+  if (ret != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Get program binary failed: " << CLErrorCode(ret);
+  }
+  return binary;
+}
 }  // namespace mindspore::lite::opencl
diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/opencl/opencl_runtime.h
index bcf066e15cc..a4b18690279 100644
--- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.h
@@ -65,61 +65,68 @@ class OpenCLRuntime {
   GpuInfo GetGpuInfo();
   bool GetFp16Enable() const;
   bool SetFp16Enable(bool enable);
-  const std::vector<size_t> &GetWorkItemSize() { return max_work_item_sizes_; }
-  uint32_t GetImagePitchAlignment() { return image_pitch_align_; }
-  cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; }
+  bool GetSVMEnable() const { return svm_enable_; }
+  void SetSVMEnable(bool enable) { svm_enable_ = enable; }
+  const std::vector<size_t> &GetWorkItemSize() const { return max_work_item_sizes_; }
+  uint32_t GetImagePitchAlignment() const { return image_pitch_align_; }
+  cl_device_svm_capabilities GetSVMCapabilities() const { return svm_enable_ ? svm_capabilities_ : 0; }
 
   template <typename T>
-  typename std::enable_if<std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index,
-                                                                                const T value) {
-    if (svm_capabilities_) {
-      MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
-      return clSetKernelArgSVMPointer(kernel, index, value);
-    } else {
-      MEM_TYPE mem_type = allocator_->GetMemType(value);
-      if (mem_type == MEM_TYPE::BUF) {
+  typename std::enable_if<std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl::Kernel &kernel, uint32_t index,
+                                                                                const T value,
+                                                                                const MemType mem_type = MemType::IMG) {
+    switch (mem_type) {
+      case MemType::SVM: {
+        MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
+        return kernel.setArg(index, value);
+      }
+      case MemType::BUF: {
         cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
         MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
-        return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)());
-      } else {
-        cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
-        MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
-        return clSetKernelArg(kernel, index, sizeof((*image)()), &(*image)());
+        return kernel.setArg(index, *buffer);
       }
+      case MemType::IMG: {
+        cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
+        if (image == nullptr) {
+          MS_LOG(WARNING) << "Can't get Image2D, try to use Buffer. Please confirm the buffer type.";
+          cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
+          MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
+          return kernel.setArg(index, *buffer);
+        }
+        MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
+        return kernel.setArg(index, *image);
+      }
+      default:
+        MS_LOG(ERROR) << "Unsupport opencl memory type: " << static_cast<int>(mem_type);
     }
   }
 
   template <typename T>
-  typename std::enable_if<!std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index,
-                                                                                 const T value) {
-    return clSetKernelArg(kernel, index, sizeof(value), &value);
+  typename std::enable_if<!std::is_pointer<T>::value, cl_int>::type SetKernelArg(
+    cl::Kernel &kernel, uint32_t index, const T value, const MemType mem_type = MemType::IMG) {
+    return kernel.setArg(index, value);
   }
 
-  template <typename T>
-  int SetKernelArg(cl::Kernel &kernel, uint32_t index, const T &value) {
-    return SetKernelArg(kernel(), index, value);
-  }
-
-  bool CreateProgramFromIL(const std::vector<u_char> program_binary, const std::string flag);
-  bool CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name);
+  cl::Program CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag);
+  cl::Program CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary, const std::string &flag);
+  cl::Kernel GetKernelFromBinary(const std::string &kernel_name);
+  std::vector<std::vector<unsigned char>> GetProgramBinaries(const cl::Program &program);
   bool LoadSource(const std::string &program_name, const std::string &source);
   int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name,
                   const std::set<std::string> &build_options);
-  int RunKernel(const cl_kernel &kernel, const std::vector<size_t> &global, const std::vector<size_t> &local,
-                cl::CommandQueue *command_queue);
   int RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global, const std::vector<size_t> &local,
                 cl::CommandQueue *command_queue);
   bool CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr,
                            bool sync = false) const;
   bool CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr,
                            bool sync = false) const;
-  void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
+  void *MapBuffer(const cl::Buffer &buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
                   bool sync = false) const;
-  void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
+  void *MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
                   cl::CommandQueue *command_queue = nullptr) const;
   int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
                 bool sync = false) const;
-  int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
+  int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
   int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
   bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr);
 
@@ -136,7 +143,7 @@ class OpenCLRuntime {
   GpuInfo ParseGpuInfo(std::string device_name, std::string device_version);
 
   bool LoadProgram(const std::string &program_name, cl::Program *program);
-  bool BuildProgram(const std::string &build_options, cl::Program *program);
+  bool BuildProgram(const std::string &build_options, const cl::Program &program);
 
  private:
   static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_;
@@ -146,7 +153,7 @@ class OpenCLRuntime {
   std::shared_ptr<cl::Device> device_{nullptr};
   std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
   std::map<std::string, cl::Program> program_map_{};
-  cl_program il_program_{0};
+  cl::Program binary_program_{0};
   uint64_t global_memery_cachesize_{0};
   int max_work_group_size;
   uint32_t compute_units_{0};
@@ -155,6 +162,7 @@ class OpenCLRuntime {
   GpuInfo gpu_info_;
   bool support_fp16_{false};
   bool fp16_enable_{false};
+  bool svm_enable_{false};
   cl_device_svm_capabilities svm_capabilities_{0};
   cl_uint image_pitch_align_{0};
   std::vector<size_t> max_work_item_sizes_;
diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
index b97f0259eda..f6f0b8bd967 100644
--- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
@@ -273,6 +273,15 @@ cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const ch
   return func(context, count, strings, lengths, errcode_ret);
 }
 
+// clCreateProgramWithBinary wrapper, use OpenCLWrapper function.
+cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *devices_list,
+                                     const size_t *lengths, const unsigned char **binaries, cl_int *binary_status,
+                                     cl_int *errcode_ret) {
+  auto func = mindspore::lite::opencl::OpenCLWrapper::GetInstance()->clCreateProgramWithBinary;
+  MS_ASSERT(func != nullptr);
+  return func(context, num_devices, devices_list, lengths, binaries, binary_status, errcode_ret);
+}
+
 // clGetProgramInfo wrapper, use OpenCLWrapper function.
 cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value,
                         size_t *param_value_size_ret) {