From c018938e9a675ff69d6b5ae734db9682dcefff4d Mon Sep 17 00:00:00 2001 From: Corleone Date: Wed, 16 Sep 2020 22:49:15 +0800 Subject: [PATCH] fixed opencl program stuck for some device --- .../kernel/opencl/kernel/activation.cc | 18 ++--- .../runtime/kernel/opencl/kernel/activation.h | 1 - .../kernel/opencl/kernel/arithmetic.cc | 23 +++--- .../runtime/kernel/opencl/kernel/arithmetic.h | 2 - .../kernel/opencl/kernel/arithmetic_self.cc | 20 ++--- .../kernel/opencl/kernel/arithmetic_self.h | 1 - .../runtime/kernel/opencl/kernel/batchnorm.cc | 30 +++---- .../runtime/kernel/opencl/kernel/batchnorm.h | 1 - .../runtime/kernel/opencl/kernel/biasadd.cc | 24 +++--- .../runtime/kernel/opencl/kernel/biasadd.h | 1 - .../runtime/kernel/opencl/kernel/concat.cc | 76 +++++++++--------- .../src/runtime/kernel/opencl/kernel/concat.h | 1 - .../kernel/opencl/kernel/conv2d_transpose.cc | 35 ++++---- .../kernel/opencl/kernel/conv2d_transpose.h | 1 - .../kernel/opencl/kernel/convolution.cc | 80 +++++++++---------- .../kernel/opencl/kernel/convolution.h | 1 - .../kernel/opencl/kernel/depthwise_conv2d.cc | 42 +++++----- .../kernel/opencl/kernel/depthwise_conv2d.h | 1 - .../runtime/kernel/opencl/kernel/gather.cc | 26 +++--- .../src/runtime/kernel/opencl/kernel/gather.h | 1 - .../runtime/kernel/opencl/kernel/matmul.cc | 29 +++---- .../src/runtime/kernel/opencl/kernel/matmul.h | 1 - .../runtime/kernel/opencl/kernel/pooling2d.cc | 30 +++---- .../runtime/kernel/opencl/kernel/pooling2d.h | 1 - .../src/runtime/kernel/opencl/kernel/prelu.cc | 25 +++--- .../src/runtime/kernel/opencl/kernel/prelu.h | 1 - .../runtime/kernel/opencl/kernel/reduce.cc | 19 ++--- .../src/runtime/kernel/opencl/kernel/reduce.h | 1 - .../runtime/kernel/opencl/kernel/reshape.cc | 21 +++-- .../runtime/kernel/opencl/kernel/reshape.h | 1 - .../src/runtime/kernel/opencl/kernel/scale.cc | 1 - .../src/runtime/kernel/opencl/kernel/scale.h | 2 - .../src/runtime/kernel/opencl/kernel/slice.cc | 26 +++--- .../src/runtime/kernel/opencl/kernel/slice.h | 1 - .../runtime/kernel/opencl/kernel/softmax.cc | 32 ++++---- .../runtime/kernel/opencl/kernel/softmax.h | 2 - .../runtime/kernel/opencl/kernel/to_format.cc | 21 +++-- .../runtime/kernel/opencl/kernel/to_format.h | 1 - .../runtime/kernel/opencl/kernel/transpose.cc | 27 +++---- .../runtime/kernel/opencl/kernel/transpose.h | 1 - .../src/runtime/kernel/opencl/opencl_kernel.h | 13 ++- .../kernel/opencl/subgraph_opencl_kernel.cc | 22 +++-- .../kernel/opencl/subgraph_opencl_kernel.h | 1 + .../src/runtime/opencl/opencl_allocator.cc | 17 ---- .../src/runtime/opencl/opencl_allocator.h | 1 - .../lite/src/runtime/opencl/opencl_executor.h | 6 +- mindspore/lite/src/scheduler.cc | 2 +- .../runtime/kernel/opencl/activation_tests.cc | 15 ---- .../kernel/opencl/arithmetic_self_tests.cc | 1 - .../runtime/kernel/opencl/arithmetic_tests.cc | 1 - .../runtime/kernel/opencl/batchnorm_tests.cc | 1 - .../runtime/kernel/opencl/biasadd_tests.cc | 3 - .../src/runtime/kernel/opencl/concat_tests.cc | 2 - .../src/runtime/kernel/opencl/gather_tests.cc | 1 - .../src/runtime/kernel/opencl/prelu_tests.cc | 3 - .../src/runtime/kernel/opencl/scale_tests.cc | 1 - .../src/runtime/kernel/opencl/slice_tests.cc | 2 - 57 files changed, 306 insertions(+), 414 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index 66e6e90ee78..b36842f30a5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -55,8 +55,7 @@ int ActivationOpenClKernel::Init() { c = in_tensors_[0]->shape()[3]; } nhwc_shape_ = {n, h, w, c}; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); if (in_size_ != 2 && in_size_ != 4) { MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_; @@ -75,9 +74,9 @@ int ActivationOpenClKernel::Init() { std::string source = activation_source; std::set build_options; - ocl_runtime->LoadSource(Program_Kernel[type_][0], source); + ocl_runtime_->LoadSource(Program_Kernel[type_][0], source); std::string kernel_name = Program_Kernel[type_][1]; - ocl_runtime->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options); in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); in_tensors_[0]->SetFormat(op_format_); @@ -89,17 +88,16 @@ int ActivationOpenClKernel::Init() { int ActivationOpenClKernel::Run() { MS_LOG(DEBUG) << op_parameter_->name_ << " begin running!"; cl_int4 img2d_shape = GetImg2dShape(); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, img2d_shape); if (type_ == ActivationType_LEAKY_RELU) { - ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_); } std::vector local = {}; std::vector global = {static_cast(img2d_shape.s[1]), static_cast(img2d_shape.s[2])}; - auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); + auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel:" << op_parameter_->name_ << " fail."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h index 58734a314aa..6cc25edb6d9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h @@ -19,7 +19,6 @@ #include -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/fp32/activation.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index ce78dd3a7ba..59a520d67bb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -34,7 +34,7 @@ namespace mindspore::kernel { ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() { if (weight_ptr_ != nullptr) { - auto allocator = runtime_->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); allocator->Free(weight_ptr_); weight_ptr_ = nullptr; } @@ -106,7 +106,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { const ArithmeticParameter *arithmetic_parameter = reinterpret_cast(op_parameter_); if (!arithmetic_parameter->broadcasting_) { if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { - auto allocator = runtime_->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); std::vector img_size; GetImageSize(0, &img_size); int pack_weight_size = in_tensors_[1]->ElementsC4Num(); @@ -194,7 +194,6 @@ int ArithmeticOpenCLKernel::InitBuffer() { } int ArithmeticOpenCLKernel::Init() { - runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); std::string kernel_name; const ArithmeticParameter *arithmetic_parameter = reinterpret_cast(op_parameter_); @@ -265,7 +264,7 @@ int ArithmeticOpenCLKernel::Init() { lite::STATUS error_code = RET_OK; #ifdef PROGRAM_WITH_IL - kernel_ = runtime_->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else if (out_mem_type_ == OpenCLMemType::IMG) { kernel_name += "_IMG"; @@ -275,8 +274,8 @@ int ArithmeticOpenCLKernel::Init() { std::string program_name = "Arithmetic"; std::set build_options; std::string source = arithmetic_source; - runtime_->LoadSource(program_name, source); - error_code = runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif if (error_code != RET_OK) { return error_code; @@ -302,10 +301,10 @@ int ArithmeticOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (element_flag_) { void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_; - runtime_->SetKernelArg(kernel_, arg_idx++, weight); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight); } else { float weight = 0.f; if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { @@ -316,9 +315,9 @@ int ArithmeticOpenCLKernel::Run() { MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); return RET_ERROR; } - runtime_->SetKernelArg(kernel_, arg_idx++, weight); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight); } - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); int H = 0; int W = 0; @@ -336,8 +335,8 @@ int ArithmeticOpenCLKernel::Run() { return RET_ERROR; } cl_int2 output_shape{W, H}; - runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); - runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h index e926f930e29..650c67d88f0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h @@ -19,7 +19,6 @@ #include #include "src/runtime/kernel/arm/fp32/arithmetic.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::kernel { @@ -42,7 +41,6 @@ class ArithmeticOpenCLKernel : public OpenCLKernel { int InitBuffer(); cl::Kernel kernel_; - lite::opencl::OpenCLRuntime *runtime_; bool element_flag_{true}; void *weight_ptr_{nullptr}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 2f5003d6963..4473fc5040d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -17,7 +17,6 @@ #include #include #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/arithmetic_self.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/cl/arithmeticself.cl.inc" @@ -51,8 +50,7 @@ int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector *im im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto enable_fp16_ = ocl_runtime->GetFp16Enable(); + auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; } @@ -136,9 +134,8 @@ int ArithmeticSelfOpenCLKernel::Init() { std::set build_options; std::string source = arithmeticself_source; std::string program_name = "ArithmeticSelf"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); return RET_OK; } @@ -162,7 +159,6 @@ void ArithmeticSelfGetWorkGroup(const std::vector &global, std::vectorname() << " Running! "; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto output_shape = out_tensors_[0]->shape(); cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)}; @@ -170,17 +166,17 @@ int ArithmeticSelfOpenCLKernel::Run() { uint32_t OW = output_shape[2]; uint32_t OC = UP_DIV(output_shape[3], C4NUM); - const std::vector &max_global = ocl_runtime->GetWorkItemSize(); + const std::vector &max_global = ocl_runtime_->GetWorkItemSize(); std::vector local = {1, 1, 1}; // init local std::vector global = {OH, OW, OC}; ArithmeticSelfGetWorkGroup(global, &local, max_global[0]); int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h index 582c8d6f9ce..7c1f7614f81 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h @@ -21,7 +21,6 @@ #include #include "ir/anf.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "nnacl/arithmetic_self_parameter.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index 6aefc623fb7..335768e6b8e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -18,7 +18,6 @@ #include #include #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/batchnorm.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/cl/batchnorm.cl.inc" @@ -40,8 +39,7 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector *img_siz im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto enable_fp16_ = ocl_runtime->GetFp16Enable(); + auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; } @@ -72,9 +70,8 @@ int BatchNormOpenCLKernel::Init() { std::set build_options; std::string source = batchnorm_source; std::string program_name = "Batch_normalization"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); return RET_OK; } @@ -98,7 +95,6 @@ void BatchNormGetWorkGroup(const std::vector &global, std::vectorname() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto input0_shape = in_tensors_[0]->shape(); auto output_shape = out_tensors_[0]->shape(); cl_int4 input_shape_ = {input0_shape[0], input0_shape[1], input0_shape[2], UP_DIV(input0_shape[3], C4NUM)}; @@ -107,20 +103,20 @@ int BatchNormOpenCLKernel::Run() { uint32_t OW = output_shape[2]; uint32_t OC = UP_DIV(output_shape[3], C4NUM); - const std::vector &max_global = ocl_runtime->GetWorkItemSize(); + const std::vector &max_global = ocl_runtime_->GetWorkItemSize(); std::vector local = {1, 1, 1}; // init local std::vector global = {OH, OW, OC}; BatchNormGetWorkGroup(global, &local, max_global[0]); int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h index f76c8a2055d..a1de54f8ab7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h @@ -20,7 +20,6 @@ #include #include "ir/anf.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "nnacl/fp32/batchnorm.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc index ea7ffad1e55..746e5211c4c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc @@ -16,6 +16,7 @@ * limitations under the License. */ +#include "src/runtime/kernel/opencl/kernel/biasadd.h" #include #include #include @@ -23,7 +24,6 @@ #include "src/kernel_registry.h" #include "include/errorcode.h" -#include "src/runtime/kernel/opencl/kernel/biasadd.h" #include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/cl/biasadd.cl.inc" @@ -38,7 +38,7 @@ namespace mindspore::kernel { void BiasAddOpenCLKernel::InitBuffer() { int C = in_tensors_[1]->shape()[0]; int div_ci = UP_DIV(C, C4NUM); - auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); size_t img_dtype = CL_FLOAT; if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; @@ -57,8 +57,7 @@ int BiasAddOpenCLKernel::Init() { for (int i = 0; i < in_size_; ++i) { input_shape_.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i]; } - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); if (in_size_ != 4 && in_size_ != 2) { MS_LOG(ERROR) << "BiasAdd only support dim=4 or 2, but your dim=" << in_size_; @@ -75,8 +74,8 @@ int BiasAddOpenCLKernel::Init() { std::string source = biasadd_source; std::string program_name = "BiasAdd"; std::string kernel_name = "BiasAdd"; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); @@ -89,18 +88,17 @@ int BiasAddOpenCLKernel::Init() { int BiasAddOpenCLKernel::Run() { cl_int4 global_size = GetGlobalshape(); MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); int arg_idx = 0; std::map data_type{ {schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}}; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); std::vector local = {1, 1}; std::vector global = {static_cast(global_size.s[1]), static_cast(global_size.s[2])}; - auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); + auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h index e7619a82962..913cabe92da 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h @@ -23,7 +23,6 @@ #include "src/tensor.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "schema/model_generated.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index a58736a109b..221dc293e4f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include "src/runtime/kernel/opencl/kernel/concat.h" #include #include #include #include #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" -#include "src/runtime/kernel/opencl/kernel/concat.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/cl/concat.cl.inc" @@ -40,8 +40,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto enable_fp16_ = ocl_runtime->GetFp16Enable(); + auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; } @@ -52,8 +51,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) } int ConcatOpenCLKernel::RunAxis0() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto allocator_ = ocl_runtime->GetAllocator(); + auto allocator_ = ocl_runtime_->GetAllocator(); std::vector img_size; auto dst_data = out_tensors_[0]->data_c(); auto dst_origin = cl::array{0, 0, 0}; @@ -64,7 +62,7 @@ int ConcatOpenCLKernel::RunAxis0() { auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size[0], img_size[1], 1}; cl::Image2D *input_image = reinterpret_cast(allocator_->GetImage(src_data)); - ocl_runtime->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region); + ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region); dst_origin[1] += region[1]; } return RET_OK; @@ -112,9 +110,8 @@ int ConcatOpenCLKernel::Init() { std::set build_options; std::string source = concat_source; std::string program_name = "Concat"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); return RET_OK; } @@ -155,7 +152,6 @@ int ConcatOpenCLKernel::Run() { return RunAxis0(); } - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto input1_shape = in_tensors_[0]->shape(); auto input2_shape = in_tensors_[1]->shape(); auto output_shape = out_tensors_[0]->shape(); @@ -168,7 +164,7 @@ int ConcatOpenCLKernel::Run() { uint32_t OW = output_shape[2]; uint32_t OC = UP_DIV(output_shape[3], C4NUM); - const std::vector &max_global = ocl_runtime->GetWorkItemSize(); + const std::vector &max_global = ocl_runtime_->GetWorkItemSize(); std::vector local = {1, 1, 1}; // init local std::vector global = {OH, OW, OC}; ConcatGetWorkGroup(global, &local, max_global[0]); @@ -176,48 +172,48 @@ int ConcatOpenCLKernel::Run() { int arg_cn = 0; if (in_tensors_.size() == 2) { - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); } else if (in_tensors_.size() == 3) { auto input3_shape = in_tensors_[2]->shape(); cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); } else if (in_tensors_.size() == 4) { auto input3_shape = in_tensors_[2]->shape(); auto input4_shape = in_tensors_[3]->shape(); cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)}; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape4_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape4_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); } else { MS_LOG(ERROR) << " input sizes must 2 or 3 or 4"; return RET_ERROR; } - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h index 5f08f21102f..394103f857c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h @@ -20,7 +20,6 @@ #include #include "ir/anf.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/arm/base/concat_base.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index 479ffd6cb2d..5dbc6ed0163 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -14,12 +14,11 @@ * limitations under the License. */ +#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h" #include #include #include "nnacl/fp32/common_func.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" -#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h" #ifndef PROGRAM_WITH_IL #include "src/runtime/kernel/opencl/cl/conv2d_transpose2x2.cl.inc" #endif @@ -41,16 +40,15 @@ int Conv2dTransposeOpenCLKernel::Init() { return RET_ERROR; } std::string kernel_name = "conv2d_transpose2x2_" + std::string(EnumNameFormat(op_format_)); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::string source = conv2d_transpose2x2_source; std::set build_options; std::string program_name = "conv2d_transpose2x2"; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif PadWeight(); in_ori_format_ = in_tensors_[0]->GetFormat(); @@ -71,7 +69,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { int kw = param->kernel_w_; int div_ci = UP_DIV(ci, C4NUM); int div_co = UP_DIV(co, C4NUM); - auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); auto data_size = enable_fp16_ ? sizeof(int16_t) : sizeof(float); // IHWO to OHWI4(I)4(O)(converter format is IHWO) @@ -188,7 +186,6 @@ int Conv2dTransposeOpenCLKernel::Run() { int ow = out_tensors_[0]->shape()[2]; int h = in_tensors_[0]->shape()[1]; int w = in_tensors_[0]->shape()[2]; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); // local size should less than MAX_GROUP_SIZE std::vector local = {16, 1, 16}; std::vector global = {UP_ROUND((size_t)UP_ROUND(oh / 2, 2), local[0]), @@ -200,16 +197,16 @@ int Conv2dTransposeOpenCLKernel::Run() { cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1}; cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1}; int arg_cnt = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h index 0ad8cbe72d9..3f39e015151 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h @@ -21,7 +21,6 @@ #include "src/lite_kernel.h" #include "nnacl/conv_parameter.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc index ea0858d1b2d..5196799b8c5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc @@ -39,12 +39,11 @@ constexpr size_t CO_TILE = C4NUM; int ConvolutionOpenCLKernel::Init() { static int init_count = 0; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto allocator = ocl_runtime->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); auto param = reinterpret_cast(op_parameter_); std::set build_options; init_count++; - use_fp16_ = ocl_runtime->GetFp16Enable(); + use_fp16_ = ocl_runtime_->GetFp16Enable(); if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; @@ -76,21 +75,21 @@ int ConvolutionOpenCLKernel::Init() { MS_LOG(DEBUG) << "use winograd"; std::string program_name; program_name = "Winograd4x4To36" + std::to_string(init_count); - ocl_runtime->LoadSource(program_name, CodeGenWinograd4x4To36()); - ocl_runtime->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options); + ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36()); + ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options); program_name = "WinogradConvolution" + std::to_string(init_count); - ocl_runtime->LoadSource(program_name, CodeGenWinogradConvolution()); - ocl_runtime->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options); + ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution()); + ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options); program_name = "Winograd36To4x4" + std::to_string(init_count); - ocl_runtime->LoadSource(program_name, CodeGenWinograd36To4x4()); - ocl_runtime->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options); + ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4()); + ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options); } else { std::string program_name = "convolution" + std::to_string(init_count); std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_conv_, program_name, "Convolution", build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options); } // allocate winograd memory @@ -167,7 +166,7 @@ int ConvolutionOpenCLKernel::GenerateWinogradWeight() { } int ConvolutionOpenCLKernel::InitWeight() { - auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); // allocate memory size_t packed_weight_size; @@ -205,8 +204,7 @@ int ConvolutionOpenCLKernel::InitWeight() { } int ConvolutionOpenCLKernel::InitBias() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto allocator = ocl_runtime->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); // align bias from C to C4 auto bias_tensor = in_tensors_[2]; @@ -272,57 +270,56 @@ int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector *img_s int ConvolutionOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); int arg_cn = 0; if (use_winograd_) { arg_cn = 0; cl_int4 _4x4to36_in_shape = {1, IH_, IW_, CI_SLICES_}; cl_int4 _4x4to36_out_shape = {1, 36, TILES_XY_, CI_SLICES_}; - ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape); - ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape); arg_cn = 0; cl_int4 conv_in_shape = {1, 36, TILES_XY_, CI_SLICES_}; cl_int4 conv_out_shape = {1, 36, TILES_XY_, CO_SLICES_}; - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape); arg_cn = 0; cl_int4 _36to4x4_in_shape = {1, 16, TILES_XY_, CO_SLICES_}; cl_int4 _36to4x4_out_shape = {1, OH_, OW_, CO_SLICES_}; - ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape); - ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape); } else { arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); if (op_format_ == Format_NC4HW4) { cl_int4 input_shape = {1, IH_, IW_, CI_SLICES_}; cl_int4 output_shape = {1, OH_, OW_, CO_SLICES_}; - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, input_shape); - ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, output_shape); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, input_shape); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, output_shape); } } if (use_winograd_) { - ocl_runtime->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr); - ocl_runtime->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr); - ocl_runtime->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr); + ocl_runtime_->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr); + ocl_runtime_->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr); + ocl_runtime_->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr); } else { std::vector global, local; SetGlobalLocalConv(&global, &local); - ocl_runtime->RunKernel(kernel_conv_, global, local, nullptr); + ocl_runtime_->RunKernel(kernel_conv_, global, local, nullptr); } return RET_OK; @@ -819,10 +816,9 @@ std::string ConvolutionOpenCLKernel::CodeGenWinograd36To4x4() { } int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector *global, std::vector *local) { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); constexpr size_t work_group_size[] = {4, 4, 1}; - auto max_work_item_sizes = ocl_runtime->GetWorkItemSize(); - size_t max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime->Device())()); + auto max_work_item_sizes = ocl_runtime_->GetWorkItemSize(); + size_t max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime_->Device())()); const size_t max_z_size = std::min(16, max_work_item_sizes[2]); size_t global_h = UP_DIV(OH_, work_group_size[0]) * work_group_size[0]; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h index 2034257c665..99a7bc522fd 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h @@ -22,7 +22,6 @@ #include "src/tensor.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "schema/model_generated.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "nnacl/conv_parameter.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index f5c04600219..19bea3f050d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -21,7 +21,6 @@ #include #include #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/utils.h" #include "nnacl/fp32/common_func.h" #include "nnacl/op_base.h" @@ -42,7 +41,6 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D; namespace mindspore::kernel { int DepthwiseConv2dOpenCLKernel::Init() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::string kernel_name = "DepthwiseConv2d"; auto in_format = op_format_; in_ori_format_ = in_tensors_[0]->GetFormat(); @@ -69,13 +67,13 @@ int DepthwiseConv2dOpenCLKernel::Init() { kernel_name += "_1x1"; } #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::string program_name = "DepthwiseConv2d"; std::set build_options; std::string source = depthwise_conv2d_source; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif this->InitBuffer(); MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast(out_mem_type_); @@ -84,9 +82,8 @@ int DepthwiseConv2dOpenCLKernel::Init() { int DepthwiseConv2dOpenCLKernel::InitBuffer() { auto parameter = reinterpret_cast(op_parameter_); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto allocator = ocl_runtime->GetAllocator(); - bool is_fp16 = ocl_runtime->GetFp16Enable(); + auto allocator = ocl_runtime_->GetAllocator(); + bool is_fp16 = ocl_runtime_->GetFp16Enable(); // weight: o, h, w, i; o == group, i == 1 void *origin_weight = in_tensors_.at(kWeightIndex)->data_c(); @@ -162,7 +159,7 @@ int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector *i im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; - if (lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) { + if (ocl_runtime_->GetFp16Enable()) { img_dtype = CL_HALF_FLOAT; } img_size->clear(); @@ -189,7 +186,6 @@ int DepthwiseConv2dOpenCLKernel::GetLocalSize(size_t idx, const std::vectorname() << " Running!"; auto parameter = reinterpret_cast(op_parameter_); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); std::vector global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; @@ -207,19 +203,19 @@ int DepthwiseConv2dOpenCLKernel::Run() { (cl_int)out_tensors_[0]->Batch()}; int arg_cnt = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dilation); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h index c48bf932857..dfeb6f7bdbc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h @@ -20,7 +20,6 @@ #include #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/conv_parameter.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc index fa651796839..c7b94cdbae0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc @@ -19,7 +19,6 @@ #include #include #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/gather.h" #include "src/runtime/kernel/opencl/cl/gather.cl.inc" @@ -49,9 +48,8 @@ int GatherOpenCLKernel::Init() { std::set build_options; std::string source = gather_source; std::string program_name = "gather"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); // init indices_data_ auto indices_tensor = in_tensors_.at(1); int indices_num = indices_tensor->ElementsNum(); @@ -104,8 +102,7 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto enable_fp16_ = ocl_runtime->GetFp16Enable(); + auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; } @@ -117,7 +114,6 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) int GatherOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); if (InitBuffer() != RET_OK) { return RET_ERROR; @@ -134,14 +130,14 @@ int GatherOpenCLKernel::Run() { std::vector local = {1, 1, 1}; std::vector global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, src_size); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, dst_size); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_num); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, src_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dst_size); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_num); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h index f6625be3f07..05883034ca0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h @@ -20,7 +20,6 @@ #include #include "ir/anf.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "nnacl/gather_parameter.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 41707f52f39..a3120f84de0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -19,7 +19,6 @@ #include #include "nnacl/fp32/common_func.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/matmul.h" #ifndef PROGRAM_WITH_IL #include "src/runtime/kernel/opencl/cl/matmul.cl.inc" @@ -35,7 +34,6 @@ namespace mindspore::kernel { int MatMulOpenCLKernel::Init() { std::string kernel_name = "MatMul"; kernel_name += "_" + std::string(EnumNameFormat(op_format_)); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto param = reinterpret_cast(op_parameter_); transposeA = param->a_transpose_; if (transposeA) { @@ -43,7 +41,7 @@ int MatMulOpenCLKernel::Init() { return RET_ERROR; } transposeB = param->b_transpose_; - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (in_tensors_[0]->shape().size() != out_tensors_[0]->shape().size() || (in_tensors_[0]->shape().size() != 2 && in_tensors_[0]->shape().size() != 4)) { MS_LOG(ERROR) << "matmul only support input shape size=2 or 4."; @@ -57,13 +55,13 @@ int MatMulOpenCLKernel::Init() { std::map dims2str = {{2, "_2d"}, {4, "_4d"}}; kernel_name += dims2str[dims]; #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = matmul_source; std::string program_name = "MatMul"; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif PadWeight(); @@ -79,7 +77,7 @@ int MatMulOpenCLKernel::ReSize() { return RET_OK; } void MatMulOpenCLKernel::PadWeight() { // ABMCI @ ABCICO = ABMCO - auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); int ci = inShape[3]; int ci4 = UP_DIV(ci, C4NUM); int co = outShape[3]; @@ -201,7 +199,6 @@ int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) int MatMulOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); // local size should less than MAX_GROUP_SIZE std::vector local = {32, 4, 1}; std::vector global = {UP_DIV(static_cast(outShape[3]), C4NUM), @@ -210,14 +207,14 @@ int MatMulOpenCLKernel::Run() { int arg_count = 0; cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; - ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_); - ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape); - ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape); - ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); + ocl_runtime_->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h index 0a540f005f4..141b16941ac 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h @@ -21,7 +21,6 @@ #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/matmul_parameter.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 80ebc6103b4..a41601fdb2f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -20,8 +20,6 @@ #include "include/errorcode.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/opencl/utils.h" -#include "src/runtime/opencl/opencl_wrapper.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/image_format.h" #ifndef PROGRAM_WITH_IL #include "src/runtime/kernel/opencl/cl/avg_pool2d.cl.inc" @@ -59,10 +57,9 @@ int PoolingOpenCLKernel::Init() { MS_LOG(ERROR) << "Init `Pooling2d` kernel failed!"; return RET_INVALID_OP_NAME; } - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else kernel_name += "_" + std::string(EnumNameFormat(op_format_)); if (out_mem_type_ == OpenCLMemType::BUF) { @@ -72,8 +69,8 @@ int PoolingOpenCLKernel::Init() { kernel_name += "_IMG"; } std::set build_options; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); @@ -124,7 +121,6 @@ int PoolingOpenCLKernel::ReSize() { return RET_OK; } int PoolingOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; @@ -135,21 +131,21 @@ int PoolingOpenCLKernel::Run() { cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding); std::vector local_size; std::vector global_size = InitGlobalSize(); - int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())()); + int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())()); local_size = GetCommonLocalSize(global_size, max_work_group_size); global_size = GetCommonGlobalSize(local_size, global_size); - ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr); + ocl_runtime_->RunKernel(kernel_, global_size, local_size, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h index bc12fb0a3f9..3c472f17290 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h @@ -21,7 +21,6 @@ #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/fp32/pooling.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 9e7ddb5f589..52d271c164b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -24,7 +24,6 @@ #include "include/errorcode.h" #include "nnacl/fp32/common_func.h" #include "src/runtime/kernel/opencl/kernel/prelu.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/cl/prelu.cl.inc" using mindspore::kernel::KERNEL_ARCH::kGPU; @@ -36,7 +35,7 @@ using mindspore::schema::PrimitiveType_PReLU; namespace mindspore::kernel { void PReluOpenCLKernel::InitBuffer() { - auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + auto allocator = ocl_runtime_->GetAllocator(); int elem_num = in_tensors_[0]->shape().size() == 2 ? in_tensors_[0]->shape()[1] : in_tensors_[0]->shape()[3]; int elem_num_c4 = UP_DIV(elem_num, C4NUM); size_t img_dtype = CL_FLOAT; @@ -91,12 +90,11 @@ int PReluOpenCLKernel::Init() { std::string source = prelu_source; std::string program_name = "PRelu"; std::string kernel_name = "PRelu"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); InitBuffer(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); in_ori_format_ = in_tensors_[0]->GetFormat(); in_tensors_[0]->SetFormat(op_format_); out_ori_format_ = out_tensors_[0]->GetFormat(); @@ -107,18 +105,17 @@ int PReluOpenCLKernel::Init() { int PReluOpenCLKernel::Run() { MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::map data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, reinterpret_cast(in_tensors_[1]->shape()[0])); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, PReluWeight_); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, reinterpret_cast(in_tensors_[1]->shape()[0])); std::vector local = {1, 1}; std::vector global = {static_cast(global_shape_.s[1]), static_cast(global_shape_.s[2])}; - auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); + auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h index d1ab4a76bd7..8f0fcc65fb1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h @@ -22,7 +22,6 @@ #include "src/tensor.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "schema/model_generated.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc index c0b2a6da0e5..a4b13f181a9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc @@ -19,7 +19,6 @@ #include #include "include/errorcode.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/reduce.h" #include "src/runtime/kernel/opencl/cl/reduce.cl.inc" @@ -59,8 +58,7 @@ int ReduceOpenCLKernel::Init() { } std::string kernel_name = reduce_type2str.at(reduce_param->mode_); kernel_name += "_" + std::string(EnumNameFormat(op_format_)); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { MS_LOG(ERROR) << "Reduce input channel " << in_tensors_[0]->shape().back() << " should equal output channel" @@ -68,12 +66,12 @@ int ReduceOpenCLKernel::Init() { return RET_ERROR; } #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = reduce_source; - ocl_runtime->LoadSource(kernel_name, source); - ocl_runtime->BuildKernel(kernel_, kernel_name, kernel_name, build_options); + ocl_runtime_->LoadSource(kernel_name, source); + ocl_runtime_->BuildKernel(kernel_, kernel_name, kernel_name, build_options); #endif in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); @@ -130,15 +128,14 @@ int ReduceOpenCLKernel::Run() { int w = shapex[2]; int c = shapex[3]; int c4 = UP_DIV(c, C4NUM); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::vector local = {}; std::vector global = {static_cast(c4)}; cl_int4 size = {h, w, c4, 1}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h index a3897dd18fd..85fc42ea827 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h @@ -20,7 +20,6 @@ #include #include "src/lite_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/reduce_parameter.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 8d9e089ddd2..a20f7dca42f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -18,7 +18,6 @@ #include #include "include/errorcode.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/reshape.h" #include "src/runtime/kernel/opencl/cl/reshape.cl.inc" @@ -34,8 +33,7 @@ namespace mindspore::kernel { int ReshapeOpenCLKernel::Init() { std::string kernel_name = "reshape"; kernel_name += "_" + std::string(EnumNameFormat(op_format_)); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { MS_LOG(ERROR) << "Reshape output size should in 2,4"; return RET_ERROR; @@ -46,13 +44,13 @@ int ReshapeOpenCLKernel::Init() { return RET_ERROR; } #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = reshape_source; std::string program_name = "reshape"; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); @@ -112,17 +110,16 @@ int ReshapeOpenCLKernel::Run() { oh = out_tensors_[0]->shape()[1]; ow = out_tensors_[0]->shape()[2]; } - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::vector local = {}; std::vector global = {(size_t)oh, (size_t)ow, (size_t)c4}; cl_int4 size = {h, w, c4, 1}; cl_int4 size_out = {oh, ow, c4, 1}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size_out); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h index 8323339e5f7..7a570967781 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h @@ -20,7 +20,6 @@ #include #include "src/lite_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index 4495858928f..e22a3f9aeb6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -245,7 +245,6 @@ int ScaleOpenCLKernel::InitBuffer() { } int ScaleOpenCLKernel::Init() { - ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); std::string kernel_name; const ScaleParameter *scale_param = reinterpret_cast(op_parameter_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h index 92955ba1223..8359396dd63 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h @@ -19,7 +19,6 @@ #include #include "nnacl/scale.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::kernel { @@ -42,7 +41,6 @@ class ScaleOpenCLKernel : public OpenCLKernel { int InitBuffer(); cl::Kernel kernel_; - lite::opencl::OpenCLRuntime *ocl_runtime_; bool element_flag_{true}; void *scale_ptr_{nullptr}; void *offset_ptr_{nullptr}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc index c6b9c2cbe65..7c95688cc07 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc @@ -18,7 +18,6 @@ #include #include #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/slice.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/cl/slice.cl.inc" @@ -40,8 +39,7 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto enable_fp16_ = ocl_runtime->GetFp16Enable(); + auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; } @@ -71,9 +69,8 @@ int SliceOpenCLKernel::Init() { std::set build_options; std::string source = slice_source; std::string program_name = "slice"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); return RET_OK; } @@ -96,7 +93,6 @@ void SlcieGetWorkGroup(const std::vector &global, std::vector *l int SliceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto input_shape = in_tensors_[0]->shape(); cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)}; cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)}; @@ -105,18 +101,18 @@ int SliceOpenCLKernel::Run() { uint32_t OH = param->size_[1]; uint32_t OW = param->size_[2]; - const std::vector &max_global = ocl_runtime->GetWorkItemSize(); + const std::vector &max_global = ocl_runtime_->GetWorkItemSize(); std::vector local = {1, 1, 1}; // init local std::vector global = {1, OH, OW}; SlcieGetWorkGroup(global, &local, max_global[0]); int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h index dadf03e0624..b081af628e1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h @@ -20,7 +20,6 @@ #include #include "ir/anf.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "nnacl/fp32/slice.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index 5275f70651c..b92f018c377 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -19,7 +19,6 @@ #include #include "include/errorcode.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/utils.h" #ifndef PROGRAM_WITH_IL #include "src/runtime/kernel/opencl/cl/softmax.cl.inc" @@ -51,7 +50,7 @@ int SoftmaxOpenCLKernel::InitGlobalSize() { int SoftmaxOpenCLKernel::SetWorkGroupSize() { // set work group size InitGlobalSize(); - int max_work_group_size = runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*runtime_->Device())()); + int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())()); local_size_ = GetCommonLocalSize(global_size_, max_work_group_size); global_size_ = GetCommonGlobalSize(local_size_, global_size_); return lite::RET_OK; @@ -101,8 +100,7 @@ int SoftmaxOpenCLKernel::Init() { std::string program_name = "SoftMax"; std::string source = softmax_source; - runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = runtime_->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); // framework not set this param yet! just use default. if (in_tensors_[0]->shape().size() == 4) { // support 4d tensor @@ -133,8 +131,8 @@ int SoftmaxOpenCLKernel::Init() { program_name += "_IMG"; } std::set build_options; - runtime_->LoadSource(program_name, source); - runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); @@ -158,32 +156,32 @@ int SoftmaxOpenCLKernel::Run() { auto mask_ = GetMaskForLastChannel(channel_size); cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; - runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (is_image_out_) { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); } else { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } - runtime_->SetKernelArg(kernel_, arg_idx++, mask); - runtime_->SetKernelArg(kernel_, arg_idx++, slices); - runtime_->SetKernelArg(kernel_, arg_idx, slices_x32); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, slices); + ocl_runtime_->SetKernelArg(kernel_, arg_idx, slices_x32); SetWorkGroupSize1x1(); } else { int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; - runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (is_image_out_) { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); } else { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } - runtime_->SetKernelArg(kernel_, arg_idx, input_shape); + ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape); SetWorkGroupSize(); } // run opengl kernel - runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h index 2ea4075f455..d949aa80921 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h @@ -21,7 +21,6 @@ #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "nnacl/fp32/softmax.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { @@ -46,7 +45,6 @@ class SoftmaxOpenCLKernel : public OpenCLKernel { private: cl::Kernel kernel_; SoftmaxParameter *parameter_; - lite::opencl::OpenCLRuntime *runtime_; bool onexone_flag_{false}; std::vector local_size_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 0e372eec7e2..f5e274519ce 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -21,7 +21,6 @@ #include #include "include/errorcode.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/cl/to_format.cl.inc" using mindspore::kernel::KERNEL_ARCH::kGPU; @@ -33,7 +32,6 @@ using mindspore::schema::PrimitiveType_ToFormat; namespace mindspore::kernel { int ToFormatOpenCLKernel::Init() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto parameter = reinterpret_cast(op_parameter_); out_mem_type_ = parameter->out_mem_type; std::string program_name = "to_format"; @@ -53,12 +51,12 @@ int ToFormatOpenCLKernel::Init() { this->set_name(kernel_name); #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = to_format_source; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif InitNHWCShape(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; @@ -147,7 +145,7 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size return RET_ERROR; } img_size->clear(); - auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable(); + auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); size_t img_dtype = CL_FLOAT; if (enable_fp16_) { img_dtype = CL_HALF_FLOAT; @@ -158,7 +156,6 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size } int ToFormatOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::vector local = {}; std::vector global; GetGlobalSize(0, &global); @@ -167,11 +164,11 @@ int ToFormatOpenCLKernel::Run() { cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1}; auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF; - ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); - ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); - ocl_runtime->SetKernelArg(kernel_, 2, gsize); - ocl_runtime->SetKernelArg(kernel_, 3, shape); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); + ocl_runtime_->SetKernelArg(kernel_, 2, gsize); + ocl_runtime_->SetKernelArg(kernel_, 3, shape); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h index 03165f7b20e..f272ff1b355 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h @@ -20,7 +20,6 @@ #include #include "src/lite_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index c541e580a4c..c05fffbe355 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -18,7 +18,6 @@ #include #include "include/errorcode.h" #include "src/kernel_registry.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/kernel/transpose.h" #ifndef PROGRAM_WITH_IL #include "src/runtime/kernel/opencl/cl/transpose.cl.inc" @@ -34,8 +33,7 @@ namespace mindspore::kernel { int TransposeOpenCLKernel::Init() { std::string kernel_name = "transpose"; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - enable_fp16_ = ocl_runtime->GetFp16Enable(); + enable_fp16_ = ocl_runtime_->GetFp16Enable(); auto param = reinterpret_cast(op_parameter_); if (param->num_axes_ == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 && param->perm_[3] == 2) { @@ -52,13 +50,13 @@ int TransposeOpenCLKernel::Init() { kernel_name += "_IMG"; } #ifdef PROGRAM_WITH_IL - kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); + kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = transpose_source; std::string program_name = "transpose"; - ocl_runtime->LoadSource(program_name, source); - ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif if ((in_tensors_[0]->shape()[1] * in_tensors_[0]->shape()[2]) % 4 != 0) { MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; @@ -114,24 +112,23 @@ int TransposeOpenCLKernel::Run() { int c = shapex[3]; int c4 = UP_DIV(c, 4); int hw4 = UP_DIV(h * w, 4); - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::vector local = {16, 16}; std::vector global = {UP_ROUND(hw4, local[0]), UP_ROUND(c4, local[1])}; cl_int2 HW = {h * w, hw4}; cl_int2 C = {c, c4}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (out_mem_type_ == OpenCLMemType::BUF) { - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } else { - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); } - ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, C); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, w); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, h); - ocl_runtime->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, HW); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, C); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, w); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, h); + ocl_runtime_->RunKernel(kernel_, global, local, nullptr); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h index 245a08363f3..aee866153a5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h @@ -21,7 +21,6 @@ #include "src/lite_kernel.h" #include "nnacl/transpose.h" -#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index c265cee18a8..7e10112f7c6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -20,6 +20,7 @@ #include #include "src/lite_kernel.h" #include "include/errorcode.h" +#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::kernel { @@ -36,7 +37,16 @@ class OpenCLKernel : public LiteKernel { public: explicit OpenCLKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs) - : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {} + : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { + ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); + } + + ~OpenCLKernel() { + if (ocl_runtime_ != nullptr) { + lite::opencl::OpenCLRuntime::DeleteInstance(); + ocl_runtime_ = nullptr; + } + } virtual int Init() { return RET_ERROR; } virtual int Prepare() { return RET_ERROR; } @@ -59,6 +69,7 @@ class OpenCLKernel : public LiteKernel { schema::Format in_ori_format_{schema::Format::Format_NHWC}; schema::Format out_ori_format_{schema::Format::Format_NHWC4}; schema::Format op_format_{schema::Format::Format_NHWC4}; + lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index 844aa04bfe0..d14bdddce64 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -99,7 +99,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te out_tensors->emplace_back(new_tensor); KernelKey desc{kGPU, kNumberTypeFloat32, schema::PrimitiveType_ToFormat}; - if (mem_type == OpenCLMemType::IMG && lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) { + if (mem_type == OpenCLMemType::IMG && ocl_runtime_->GetFp16Enable()) { desc.data_type = kNumberTypeFloat16; new_tensor->set_data_type(kNumberTypeFloat16); } @@ -160,7 +160,8 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te } int SubGraphOpenCLKernel::Init() { - allocator_ = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); + allocator_ = ocl_runtime_->GetAllocator(); MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); for (const auto tensor : in_tensors_) { tensor->set_allocator(allocator_); @@ -195,8 +196,7 @@ int SubGraphOpenCLKernel::Init() { } int SubGraphOpenCLKernel::UpdateTensorDataType() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - bool is_fp16 = ocl_runtime->GetFp16Enable(); + bool is_fp16 = ocl_runtime_->GetFp16Enable(); if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) { std::set out_set; out_set.insert(in_tensors_.begin(), in_tensors_.end()); @@ -292,16 +292,25 @@ int SubGraphOpenCLKernel::UnInit() { delete tensor; } } + in_convert_tensors_.clear(); for (const auto &tensor : out_convert_tensors_) { if (tensor != nullptr) { delete tensor; } } - for (const auto &op : in_convert_ops_) { + out_convert_tensors_.clear(); + for (const auto &op : nodes_) { if (op != nullptr) { delete op; } } + nodes_.clear(); + in_convert_ops_.clear(); + out_convert_ops_.clear(); + if (ocl_runtime_ != nullptr) { + lite::opencl::OpenCLRuntime::DeleteInstance(); + ocl_runtime_ = nullptr; + } return RET_OK; } @@ -310,14 +319,13 @@ int SubGraphOpenCLKernel::InferShape() { return RET_OK; } int SubGraphOpenCLKernel::ReSize() { return RET_OK; } int SubGraphOpenCLKernel::Run() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); for (auto &tensor : in_tensors_) { allocator_->UnmapBuffer(tensor->data_c()); } lite::opencl::OpenCLExecutor executor; executor.Run(in_tensors_, out_tensors_, nodes_, allocator_); - ocl_runtime->SyncCommandQueue(); + ocl_runtime_->SyncCommandQueue(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h index f4aae11a512..e6a083c6928 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h @@ -64,6 +64,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel { std::vector out_parameters_; std::vector in_convert_ops_; std::vector out_convert_ops_; + lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index 896d460a8ba..1a1351a853d 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -23,8 +23,6 @@ namespace mindspore::lite::opencl { -OpenCLAllocator::OpenCLAllocator() {} - OpenCLAllocator::OpenCLAllocator(OpenCLRuntime *ocl_runtime) : ocl_runtime_(ocl_runtime) {} OpenCLAllocator::~OpenCLAllocator() { Clear(); } @@ -49,9 +47,6 @@ void OpenCLAllocator::UnLock() { void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector{}); } void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size) { - if (ocl_runtime_ == nullptr) { - ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); - } auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); size_t img_pitch = 0; @@ -144,9 +139,6 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; return nullptr; } - if (ocl_runtime_ == nullptr) { - ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); - } Lock(); auto iter = free_list_.lower_bound(size); while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) { @@ -258,9 +250,6 @@ void *OpenCLAllocator::GetBuffer(void *buffer) { void OpenCLAllocator::Clear() { Lock(); - if (ocl_runtime_ == nullptr) { - ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); - } auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { if (svm_capabilities) { @@ -306,9 +295,6 @@ void OpenCLAllocator::Clear() { } void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) { - if (ocl_runtime_ == nullptr) { - ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); - } auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); if (svm_capabilities) { if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { @@ -362,9 +348,6 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, } int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { - if (ocl_runtime_ == nullptr) { - ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); - } auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); if (svm_capabilities) { if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h index 6649c29e5df..6beaf924318 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h @@ -45,7 +45,6 @@ enum class MemType : char { SVM, BUF, IMG }; class OpenCLAllocator : public Allocator { public: - OpenCLAllocator(); explicit OpenCLAllocator(OpenCLRuntime *ocl_runtime); ~OpenCLAllocator() override; void SetContext(const AllocatorContext &ctx) override; diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.h b/mindspore/lite/src/runtime/opencl/opencl_executor.h index 03061a82ca9..b12a793238b 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.h +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.h @@ -27,7 +27,11 @@ namespace mindspore::lite::opencl { class OpenCLExecutor : Executor { public: - OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); } + OpenCLExecutor() : Executor() { + auto ocl_runtime = OpenCLRuntime::GetInstance(); + allocator_ = ocl_runtime->GetAllocator(); + OpenCLRuntime::DeleteInstance(); + } int Prepare(const std::vector &kernels); diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 14de6fe173d..49b712ab8d0 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -244,7 +244,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector &in_tens TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast(primitive->Type())}; #if SUPPORT_GPU - if (context_->device_type_ == DT_GPU && lite::opencl::OpenCLRuntime::GetInstance()->IsInitOK()) { + if (context_->device_type_ == DT_GPU) { desc.arch = kernel::KERNEL_ARCH::kGPU; auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc); if (kernel != nullptr) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc index 9152b21219d..f45f0f8f2d1 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc @@ -157,7 +157,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { ret = sub_graph->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init sub_graph error."; - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -167,7 +166,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; ret = sub_graph->Run(); if (ret != RET_OK) { - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -182,7 +180,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { printf_tensor("ReluFp32--output data--", outputs[0]); CompareRes(output_tensor, out_file); } - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -271,7 +268,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { ret = sub_graph->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init sub_graph error."; - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -281,7 +277,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; ret = sub_graph->Run(); if (ret != RET_OK) { - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -297,7 +292,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { printf_tensor("Relu6:FP32--output data---", outputs[0]); CompareRes(output_tensor, out_file); } - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -386,7 +380,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { ret = sub_graph->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init sub_graph error."; - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -396,7 +389,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; ret = sub_graph->Run(); if (ret != RET_OK) { - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -412,7 +404,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { printf_tensor("Sigmoid:FP32--output data---", outputs[0]); CompareRes(output_tensor, out_file); } - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -502,7 +493,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { ret = sub_graph->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init sub_graph error."; - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -512,7 +502,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; ret = sub_graph->Run(); if (ret != RET_OK) { - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -527,7 +516,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { printf_tensor("Leaky Relu:FP32--output data---", outputs[0]); CompareRes(output_tensor, out_file); } - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -616,7 +604,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { ret = sub_graph->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init sub_graph error."; - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -626,7 +613,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; ret = sub_graph->Run(); if (ret != RET_OK) { - delete kernel; delete param; delete input_tensor; delete output_tensor; @@ -642,7 +628,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { printf_tensor("Tanh:FP32--output data---", outputs[0]); CompareRes(output_tensor, out_file); } - delete kernel; delete param; delete input_tensor; delete output_tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc index 25cbbb29e7e..6fee1ca04fb 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc @@ -127,7 +127,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { delete tensor; } delete param; - delete arithmeticself_kernel; delete sub_graph; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc index ecf8c9a9ab2..63bb2022b24 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc @@ -203,7 +203,6 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh delete[] data_c_ocl; delete kernel; - delete arith_kernel; delete param; for (auto tensor : inputs) { delete tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc index df75b7bc3d4..87242397d28 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc @@ -147,7 +147,6 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { delete tensor; } delete param; - delete batchnorm_kernel; delete sub_graph; } TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc index 339f300a3b1..72af342b1c6 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc @@ -174,7 +174,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { delete weight_tensor; delete sub_graph; delete param; - delete biasadd_kernel; return; } MS_LOG(INFO) << "Sub graph begin running!"; @@ -186,7 +185,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { delete weight_tensor; delete sub_graph; delete param; - delete biasadd_kernel; return; } @@ -202,7 +200,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { delete output_tensor; delete sub_graph; delete param; - delete biasadd_kernel; lite::opencl::OpenCLRuntime::DeleteInstance(); } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc index 2342b0668f3..45fd1d3b3db 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc @@ -164,7 +164,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { delete tensor; } delete param; - delete concat_kernel; delete sub_graph; } @@ -284,7 +283,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { delete tensor; } delete param; - delete concat_kernel; delete sub_graph; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc index 0d1f5489302..79b2e9d22eb 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc @@ -78,7 +78,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector(outputs[0]->data_c()); CommonTest::CompareOutputData(output_data, static_cast(correct_data), outputs[0]->ElementsNum(), 0.0001); - delete pkernel; delete sub_graph; } TEST_F(TestGatherOpenCL, Axis1Fp32) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc index 2de2b17a49a..22d6671f030 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc @@ -167,7 +167,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { delete output_tensor; delete weight_tensor; delete param; - delete prelu_kernel; delete sub_graph; return; } @@ -179,7 +178,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { delete output_tensor; delete weight_tensor; delete param; - delete prelu_kernel; delete sub_graph; return; } @@ -195,7 +193,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { delete output_tensor; delete weight_tensor; delete param; - delete prelu_kernel; delete sub_graph; lite::opencl::OpenCLRuntime::DeleteInstance(); } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc index 3c25d3840aa..82249183977 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc @@ -223,7 +223,6 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh delete[] data_out_ocl; delete kernel; - delete scale_kernel; delete param; for (auto tensor : inputs) { delete tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc index fcb7cc86c70..ce895f4c0cf 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc @@ -143,7 +143,6 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { for (auto tensor : outputs) { delete tensor; } - delete slice_kernel; delete sub_graph; } TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { @@ -251,7 +250,6 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { for (auto tensor : outputs) { delete tensor; } - delete slice_kernel; delete sub_graph; } } // namespace mindspore