!5123 fixed segmentation fault for opencl mode

Merge pull request !5123 from liuchao/master
2020-08-25 17:07:23 +08:00 · 2020-08-25 17:07:23 +08:00 · 4efa20a137
parent a2049fa0c5 3cb849c441
commit 4efa20a137
20 changed files with 56 additions and 109 deletions
--- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
@ -35,39 +35,17 @@ using mindspore::kernel::CLErrorCode;

 namespace mindspore::lite::opencl {

-std::map<std::string, std::string> g_opencl_program_map;
-
+static std::map<std::string, std::string> g_opencl_program_map;
 static std::mutex g_mtx;
 static std::mutex g_init_mtx;

-// magic number
-static std::map<int, int> AdrenoSubGroup{
-  {640, 128}, {630, 128}, {616, 128}, {612, 64}, {610, 64}, {540, 32}, {530, 32},
-  {512, 32},  {510, 32},  {509, 32},  {506, 32}, {505, 32}, {405, 32}, {330, 16},
-};
-
-#ifdef USE_OPENCL_WRAPPER
-std::shared_ptr<OpenCLWrapper> OpenCLWrapper::opencl_wrapper_singleton_ = nullptr;
-#endif
-std::shared_ptr<OpenCLRuntime> OpenCLRuntime::opencl_runtime_singleton_ = nullptr;
 bool OpenCLRuntime::init_done_ = false;

 OpenCLRuntime *OpenCLRuntime::GetInstance() {
  std::unique_lock<std::mutex> lck(g_mtx);
-  if (opencl_runtime_singleton_.get() == nullptr) {
-    opencl_runtime_singleton_.reset(new OpenCLRuntime());
-    opencl_runtime_singleton_->Init();
-  }
-  return opencl_runtime_singleton_.get();
-}
-
-void OpenCLRuntime::DeleteInstance() {
-  std::unique_lock<std::mutex> lck(g_mtx);
-  init_done_ = false;
-  if (opencl_runtime_singleton_ != nullptr) {
-    opencl_runtime_singleton_.reset();
-    opencl_runtime_singleton_ = nullptr;
-  }
+  static OpenCLRuntime ocl_runtime;
+  ocl_runtime.Init();
+  return &ocl_runtime;
 }

 OpenCLRuntime::OpenCLRuntime() { default_build_opts_ = " -cl-mad-enable -cl-fast-relaxed-math -Werror"; }
@ -88,7 +66,7 @@ int OpenCLRuntime::Init() {
  MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION;

 #ifdef USE_OPENCL_WRAPPER
-  if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) {
+  if (OpenCLWrapper::GetInstance()->LoadOpenCLLibrary() == false) {
    MS_LOG(ERROR) << "Load OpenCL symbols failed!";
    return RET_ERROR;
  }
@ -123,7 +101,11 @@ int OpenCLRuntime::Init() {
    return RET_ERROR;
  }

-  device_ = std::make_shared<cl::Device>();
+  device_ = new (std::nothrow) cl::Device();
+  if (device_ == nullptr) {
+    MS_LOG(ERROR) << "Create OpenCL device failed!";
+    return RET_ERROR;
+  }
  *device_ = devices[0];
  max_work_item_sizes_ = device_->getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
  const std::string device_name = device_->getInfo<CL_DEVICE_NAME>();
@ -144,20 +126,21 @@ int OpenCLRuntime::Init() {
  MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
  cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(),
                                          CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
-  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
+  context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);

-  if (ret != CL_SUCCESS) {
-    MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then.";
-    context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
+  if (ret != CL_SUCCESS || context_ == nullptr) {
+    MS_LOG(ERROR) << "Create special OpenCL context failed, Create common OpenCL context then.";
+    context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
+    if (context_ == nullptr) {
+      MS_LOG(ERROR) << "Create OpenCL context failed!";
+      return RET_ERROR;
+    }
  }
 #else
  MS_LOG(INFO) << "Create common opencl context";
-  //  cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
-  //                                          CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
-  //  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
-  context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
+  context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
 #endif
-  if (ret != CL_SUCCESS) {
+  if (ret != CL_SUCCESS || context_ == nullptr) {
    MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
    return RET_ERROR;
  }
@ -203,13 +186,17 @@ int OpenCLRuntime::Init() {
  properties |= CL_QUEUE_PROFILING_ENABLE;
 #endif

-  default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret);
-  if (ret != CL_SUCCESS) {
+  default_command_queue_ = new (std::nothrow) cl::CommandQueue(*context_, *device_, properties, &ret);
+  if (ret != CL_SUCCESS || default_command_queue_ == nullptr) {
    MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
    return RET_ERROR;
  }

-  allocator_ = std::make_shared<OpenCLAllocator>();
+  allocator_ = new (std::nothrow) OpenCLAllocator();
+  if (allocator_ == nullptr) {
+    MS_LOG(ERROR) << "Command OpenCL allocator failed!";
+    return RET_ERROR;
+  }
 #ifdef PROGRAM_WITH_IL
  std::string flag = "";
  binary_program_ = CreateProgramFromIL(g_program_binary, flag);
@ -221,17 +208,18 @@ int OpenCLRuntime::Init() {
 }

 OpenCLRuntime::~OpenCLRuntime() {
+  init_done_ = false;
  program_map_.clear();
-  // allocator_->Clear();
-  allocator_.reset();
-  default_command_queue_.reset();
-  context_.reset();
-  device_.reset();
+  delete allocator_;
+  delete default_command_queue_;
+  delete context_;
+  delete device_;
+  OpenCLWrapper::GetInstance()->UnLoadOpenCLLibrary();
 }

-cl::Context *OpenCLRuntime::Context() { return context_.get(); }
+cl::Context *OpenCLRuntime::Context() { return context_; }

-cl::Device *OpenCLRuntime::Device() { return device_.get(); }
+cl::Device *OpenCLRuntime::Device() { return device_; }

 uint64_t OpenCLRuntime::DeviceGlobalMemoryCacheSize() const { return global_memery_cachesize_; }

@ -262,9 +250,7 @@ uint32_t OpenCLRuntime::GetSubGroupSize(const cl::Kernel &kernel, const cl::NDRa
      sub_group_size = 0;
    }
 #else
-    if (AdrenoSubGroup.find(gpu_info_.model_num) != AdrenoSubGroup.end()) {
-      sub_group_size = AdrenoSubGroup[gpu_info_.model_num];
-    }
+    sub_group_size = 0;
 #endif
  }

@ -337,7 +323,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
 int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global,
                             const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  MS_ASSERT(local.size() == 0 || local.size() == global.size());
  std::vector<size_t> internal_global_ws = global;
@ -462,7 +448,7 @@ bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Pro
 bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue,
                                        bool sync) const {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  cl_int cl_ret = CL_SUCCESS;
  const cl::Buffer *buffer = static_cast<const cl::Buffer *>(src);
@ -475,7 +461,7 @@ bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size,
 bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue,
                                        bool sync) const {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  cl_int cl_ret = CL_SUCCESS;
  const cl::Buffer *buffer = static_cast<const cl::Buffer *>(dst);
@ -488,7 +474,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t
 void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue,
                               bool sync) const {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  return command_queue->enqueueMapBuffer(buffer, sync, flags, 0, size);
 }
@ -498,7 +484,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
    return RET_OK;
  }
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
 }
@ -506,7 +492,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
 void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
                               cl::CommandQueue *command_queue) const {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  cl::size_type row_pitch;
  cl::size_type slice_pitch;
@ -517,7 +503,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags,

 int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  return command_queue->enqueueUnmapMemObject(buffer, host_ptr);
 }
@ -527,14 +513,14 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
    return RET_OK;
  }
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  return command_queue->enqueueUnmapSVM(host_ptr);
 }

 bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
  if (command_queue == nullptr) {
-    command_queue = default_command_queue_.get();
+    command_queue = default_command_queue_;
  }
  cl_int ret = command_queue->finish();
  if (ret != CL_SUCCESS) {
--- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.h
@ -20,7 +20,6 @@ j* you may not use this file except in compliance with the License.
 #include <vector>
 #include <map>
 #include <memory>
-#include <mutex>
 #include <set>
 #include <string>
 #include <type_traits>
@ -38,9 +37,6 @@ struct GpuInfo {
  float opencl_version = 0;
 };

-// Base GPU cache size used for computing local work group size.
-const int32_t g_base_gpu_mem_cachesize = 16384;
-
 class OpenCLRuntime {
 public:
  static OpenCLRuntime *GetInstance();
@ -54,8 +50,8 @@ class OpenCLRuntime {

  cl::Context *Context();
  cl::Device *Device();
-  OpenCLAllocator *GetAllocator() { return allocator_.get(); }
-  cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_.get(); }
+  OpenCLAllocator *GetAllocator() { return allocator_; }
+  cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_; }
  uint64_t DeviceGlobalMemoryCacheSize() const;
  int DeviceMaxWorkGroupSize() const;
  uint32_t DeviceComputeUnits() const;
@ -146,13 +142,12 @@ class OpenCLRuntime {
  bool BuildProgram(const std::string &build_options, const cl::Program &program);

 private:
-  static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_;
  static bool init_done_;
-  std::shared_ptr<cl::CommandQueue> default_command_queue_{nullptr};
-  std::shared_ptr<cl::Context> context_{nullptr};
-  std::shared_ptr<cl::Device> device_{nullptr};
-  std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
-  std::map<std::string, cl::Program> program_map_{};
+  cl::CommandQueue *default_command_queue_{nullptr};
+  cl::Context *context_{nullptr};
+  cl::Device *device_{nullptr};
+  OpenCLAllocator *allocator_{nullptr};
+  std::map<std::string, cl::Program> program_map_;
  cl::Program binary_program_{0};
  uint64_t global_memery_cachesize_{0};
  int max_work_group_size;
@ -169,5 +164,4 @@ class OpenCLRuntime {
 };

 }  // namespace mindspore::lite::opencl
-
 #endif  // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_
--- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
@ -66,19 +66,13 @@ static const std::vector<std::string> g_opencl_library_paths = {
 };

 OpenCLWrapper *OpenCLWrapper::GetInstance() {
-  static std::once_flag opencl_wrapper_once;
-  std::call_once(opencl_wrapper_once,
-                 []() { opencl_wrapper_singleton_ = std::shared_ptr<OpenCLWrapper>(new OpenCLWrapper()); });
-
-  return opencl_wrapper_singleton_.get();
+  static OpenCLWrapper ocl_wrapper;
+  return &ocl_wrapper;
 }

 OpenCLWrapper::OpenCLWrapper() {}

-OpenCLWrapper::~OpenCLWrapper() {
-  if (nullptr == opencl_wrapper_singleton_.get()) return;
-  opencl_wrapper_singleton_->UnLoadOpenCLLibrary();
-}
+OpenCLWrapper::~OpenCLWrapper() {}

 // load default library path
 bool OpenCLWrapper::LoadOpenCLLibrary() {
--- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.h
@ -230,8 +230,7 @@ class OpenCLWrapper {
  bool LoadLibraryFromPath(const std::string &path);

 private:
-  static std::shared_ptr<OpenCLWrapper> opencl_wrapper_singleton_;
-  void *handle_ = nullptr;
+  void *handle_{nullptr};
 };

 }  // namespace mindspore::lite::opencl
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
@ -173,7 +173,6 @@ TEST_F(TestActivationOpenCL, ReluFp32_dim4) {
  delete input_tensor;
  delete output_tensor;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
@ -276,7 +275,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) {
  delete input_tensor;
  delete output_tensor;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
@ -379,7 +377,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) {
  delete input_tensor;
  delete output_tensor;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
@ -483,6 +480,5 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) {
  delete input_tensor;
  delete output_tensor;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
@ -202,7 +202,6 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
  for (auto tensor : outputs) {
    delete tensor;
  }
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 class TestArithmeticOpenCL : public mindspore::CommonTest {
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc
@ -143,7 +143,6 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
  delete pooling_kernel;
  delete pGraph;
  delete param;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
@ -155,6 +155,5 @@ TEST_F(TestBatchnormOpenCL, Batchnorminput_dim4) {
  delete param;
  delete batchnorm_kernel;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
@ -213,6 +213,5 @@ TEST_F(TestConcatOpenCL, ConcatFp32_2input_dim4_axis3) {
  delete param;
  delete concat_kernel;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc
@ -175,7 +175,6 @@ void RunTestCase(const std::vector<int> shape, const std::vector<std::string> fi
  inputs[0]->SetData(nullptr);
  outputs[0]->SetData(nullptr);
  MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed";
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
  int pad = 0;
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc
@ -136,7 +136,6 @@ void TEST_MAIN(schema::Format input_format, schema::Format output_format, const
  bias_tensor.SetData(nullptr);
  delete param;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionOpenCL, in1x1x64x512_out1x1x64x7358_k11_s11_p0000) {
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc
@ -195,7 +195,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) {
                      2.2294958, 1.6570128, 2.465089,  1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};

  DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4);
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
@ -268,7 +267,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) {
                      1.0517888,  0.59817517, 0.75649744, 1.2075498,  0.38804203};

  DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4);
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
@ -314,7 +312,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) {
                      2.2294958, 1.6570128, 2.465089,  1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988};

  DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4);
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
@ -387,7 +384,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) {
                      1.0517888,  0.59817517, 0.75649744, 1.2075498,  0.38804203};

  DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4);
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
@ -512,7 +508,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) {
  inputs[1]->SetData(nullptr);
  inputs[2]->SetData(nullptr);
  MS_LOG(INFO) << "TestConvolutionDwNoPadFp32 passed";
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
@ -673,7 +668,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) {
  inputs[1]->SetData(nullptr);
  inputs[2]->SetData(nullptr);
  MS_LOG(INFO) << "TestConvolutionDwPadFp32 passed";
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
@ -739,7 +733,6 @@ TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) {
      DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), nullptr, schema::Format_NHWC4, false);
    }
  }
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
@ -788,6 +781,5 @@ TEST_F(TestConvolutionDwOpenCL, Buffer2Image) {
  }
  //      DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true);
  DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), gnd_data.get(), schema::Format_NHWC4, true);
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
@ -115,7 +115,6 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
  CompareOutputData(output_data, correct_data, co, 0.0001);
  tensor_x->SetData(nullptr);
  tensor_out->SetData(nullptr);
-  lite::opencl::OpenCLRuntime::DeleteInstance();
  MS_LOG(INFO) << "TestMatMulFp32 passed";
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc
@ -118,7 +118,6 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
  }
  delete pooling_kernel;
  delete pGraph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc
@ -183,6 +183,5 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
  delete param;
  delete prelu_kernel;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc
@ -104,7 +104,6 @@ TEST_F(TestReshapeOpenCL, ReshapeFp32) {

  inputs[0]->SetData(nullptr);
  outputs[0]->SetData(nullptr);
-  lite::opencl::OpenCLRuntime::DeleteInstance();

  MS_LOG(INFO) << "Test ReshapeFp32 passed";
 }
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
@ -144,6 +144,5 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) {
  }
  delete slice_kernel;
  delete sub_graph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc
@ -92,7 +92,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
  }
  delete kernel;
  delete pGraph;
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }

 TEST_F(TestSoftmaxOpenCL, Softmax_1) {
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc
@ -103,6 +103,5 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) {
  // compare
  CompareOutputData(output_data, correct_data, h * w * c, 0.00001);
  MS_LOG(INFO) << "Test TransposeFp32 passed";
-  lite::opencl::OpenCLRuntime::DeleteInstance();
 }
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc
@ -106,7 +106,6 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) {

  inputs[0]->SetData(nullptr);
  outputs[0]->SetData(nullptr);
-  lite::opencl::OpenCLRuntime::DeleteInstance();

  MS_LOG(INFO) << "Test TransposeFp32 passed";
 }