From 564bf717d5da5d8b94997a3c204bb451af335709 Mon Sep 17 00:00:00 2001 From: Corleone Date: Sat, 22 Aug 2020 16:42:51 +0800 Subject: [PATCH] add create program from binary interface for opencl --- .../kernel/opencl/kernel/arithmetic.cc | 5 +- .../kernel/opencl/kernel/conv2d_transpose.cc | 2 +- .../kernel/opencl/kernel/depthwise_conv2d.cc | 2 +- .../runtime/kernel/opencl/kernel/matmul.cc | 2 +- .../runtime/kernel/opencl/kernel/pooling2d.cc | 2 +- .../runtime/kernel/opencl/kernel/reshape.cc | 2 +- .../runtime/kernel/opencl/kernel/softmax.cc | 2 +- .../runtime/kernel/opencl/kernel/to_format.cc | 2 +- .../runtime/kernel/opencl/kernel/transpose.cc | 2 +- .../src/runtime/opencl/opencl_allocator.cc | 12 +- .../src/runtime/opencl/opencl_allocator.h | 5 +- .../lite/src/runtime/opencl/opencl_runtime.cc | 158 +++++++----------- .../lite/src/runtime/opencl/opencl_runtime.h | 74 ++++---- .../lite/src/runtime/opencl/opencl_wrapper.cc | 9 + 14 files changed, 132 insertions(+), 147 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 7fb0a15a58a..8fb4a2d52b1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -103,10 +103,7 @@ int ArithmeticOpenCLKernel::Init() { lite::STATUS error_code = RET_OK; #ifdef PROGRAM_WITH_IL - bool ret = runtime_->CreateKernelFromIL(kernel_(), kernel_name); - if (!ret) { - error_code = RET_ERROR; - } + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else if (out_mem_type_ == OpenCLMemType::IMG) { kernel_name += "_IMG"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index f14a00103c8..dcf776efe65 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -42,7 +42,7 @@ int Conv2dTransposeOpenCLKernel::Init() { std::string kernel_name = "conv2d_transpose2x2"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else std::string source = conv2d_transpose2x2_source; std::set build_options; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index fbd146992c7..b6c2e05b013 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -64,7 +64,7 @@ int DepthwiseConv2dOpenCLKernel::Init() { kernel_name += "_1x1"; } #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else std::string program_name = "DepthwiseConv2d"; std::set build_options; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 9593cc419d1..4dbfb318447 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -36,7 +36,7 @@ int MatMulOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = matmul_source; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 6e181c5ec28..65cac2c069c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -62,7 +62,7 @@ int PoolingOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else if (out_mem_type_ == OpenCLMemType::BUF) { kernel_name += "_BUF"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 6fc2f4ce313..db644507ca2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -45,7 +45,7 @@ int ReshapeOpenCLKernel::Init() { return RET_ERROR; } #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = reshape_source; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index caa6a57f956..9422c08e600 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -107,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() { MS_LOG(EXCEPTION) << "Init `Softmax` kernel failed: Unsupported axis: " << parameter_->axis_; } #ifdef PROGRAM_WITH_IL - runtime_->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else if (!is_image_out_) { out_mem_type_ = OpenCLMemType::BUF; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 92c2c03479a..9e8361d2edc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -50,7 +50,7 @@ int ToFormatOpenCLKernel::Init() { this->set_name(kernel_name); #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = to_format_source; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index 9c2df7bc7b2..e6b1d888c85 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -41,7 +41,7 @@ int TransposeOpenCLKernel::Init() { kernel_name += "_IMG"; } #ifdef PROGRAM_WITH_IL - ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); + kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else std::set build_options; std::string source = transpose_source; diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index e39dcdf2778..fc9e1caf151 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -81,7 +81,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size) void *device_ptr = nullptr; void *image_ptr = nullptr; - if (svm_capabilities && svm_on_) { + if (svm_capabilities) { cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0; flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0; flags = flags | CL_MEM_READ_WRITE; @@ -280,7 +280,7 @@ void OpenCLAllocator::Clear() { void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) { auto ocl_runtime = opencl::OpenCLRuntime::GetInstance(); auto svm_capabilities = ocl_runtime->GetSVMCapabilities(); - if (svm_capabilities && svm_on_) { + if (svm_capabilities) { if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { @@ -356,8 +356,8 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { } } -MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { - MEM_TYPE mem_type{MEM_TYPE::BUF}; +MemType OpenCLAllocator::GetMemType(void *host_ptr) { + MemType mem_type{MemType::BUF}; Lock(); auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { @@ -367,9 +367,9 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { } MemBuf *mem_buf = it->second; if (mem_buf->img_size.empty()) { - mem_type = MEM_TYPE::BUF; + mem_type = MemType::BUF; } else { - mem_type = MEM_TYPE::IMG; + mem_type = MemType::IMG; } UnLock(); return mem_type; diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h index 87bf03cc5cd..b582e83bc6e 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h @@ -40,7 +40,7 @@ struct OpenclMemory { OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER}; }; -enum class MEM_TYPE : char { BUF, IMG }; +enum class MemType : char { SVM, BUF, IMG }; class OpenCLAllocator : public Allocator { public: @@ -58,7 +58,7 @@ class OpenCLAllocator : public Allocator { void *GetBuffer(void *host_ptr); void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true); int UnmapBuffer(void *host_ptr, void *command_queue = nullptr); - MEM_TYPE GetMemType(void *host_ptr); + MemType GetMemType(void *host_ptr); int GetImageSize(void *host_ptr, std::vector *img_size); void *Prepare(void *ptr) override { if (ptr != nullptr) { @@ -86,7 +86,6 @@ class OpenCLAllocator : public Allocator { // 6 is empirical value int shift_factor_ = 6; bool lock_flag_ = false; - bool svm_on_{false}; }; } // namespace mindspore::lite::opencl diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc index af6ed559043..8a226bc4982 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc @@ -138,27 +138,27 @@ int OpenCLRuntime::Init() { << max_work_item_sizes_[2]; gpu_info_ = ParseGpuInfo(device_name, device_version); - cl_int err; + cl_int ret; #if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120) // create context from glcontext MS_LOG(INFO) << "Create special opencl context to share with OpenGL"; cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(), CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0}; - context_ = std::make_shared(std::vector{*device_}, context_prop, nullptr, nullptr, &err); + context_ = std::make_shared(std::vector{*device_}, context_prop, nullptr, nullptr, &ret); - if (err != CL_SUCCESS) { + if (ret != CL_SUCCESS) { MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then."; - context_ = std::make_shared(std::vector{*device_}, nullptr, nullptr, nullptr, &err); + context_ = std::make_shared(std::vector{*device_}, nullptr, nullptr, nullptr, &ret); } #else MS_LOG(INFO) << "Create common opencl context"; // cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](), // CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0}; // context_ = std::make_shared(std::vector{*device_}, context_prop, nullptr, nullptr, &err); - context_ = std::make_shared(std::vector{*device_}, nullptr, nullptr, nullptr, &err); + context_ = std::make_shared(std::vector{*device_}, nullptr, nullptr, nullptr, &ret); #endif - if (err != CL_SUCCESS) { - MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err); + if (ret != CL_SUCCESS) { + MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -170,9 +170,8 @@ int OpenCLRuntime::Init() { auto success = device_->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fp_config); support_fp16_ = CL_SUCCESS == success && fp_config > 0; - err = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_); - svm_capabilities_ = 0; - if (err != CL_SUCCESS || svm_capabilities_ == 0) { + ret = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_); + if (ret != CL_SUCCESS || svm_capabilities_ == 0) { svm_capabilities_ = 0; MS_LOG(INFO) << "SVM capalibilties: " << "NONE"; @@ -204,16 +203,16 @@ int OpenCLRuntime::Init() { properties |= CL_QUEUE_PROFILING_ENABLE; #endif - default_command_queue_ = std::make_shared(*context_, *device_, properties, &err); - if (err != CL_SUCCESS) { - MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err); + default_command_queue_ = std::make_shared(*context_, *device_, properties, &ret); + if (ret != CL_SUCCESS) { + MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret); return RET_ERROR; } allocator_ = std::make_shared(); #ifdef PROGRAM_WITH_IL std::string flag = ""; - CreateProgramFromIL(g_program_binary, flag); + binary_program_ = CreateProgramFromIL(g_program_binary, flag); #endif init_done_ = true; MS_LOG(INFO) << "OpenCLRuntime init done!"; @@ -317,7 +316,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na MS_LOG(ERROR) << "load program (" << program_name << ") failed!"; return RET_ERROR; } - status = this->BuildProgram(build_options_str, &program); + status = this->BuildProgram(build_options_str, program); if (!status) { MS_LOG(ERROR) << program_name << " build failed!"; return RET_ERROR; @@ -325,53 +324,15 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na program_map_.emplace(build_program_key, program); } - cl_int err; - kernel = cl::Kernel(program, kernel_name.c_str(), &err); - if (err != CL_SUCCESS) { - MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err); + cl_int ret; + kernel = cl::Kernel(program, kernel_name.c_str(), &ret); + if (ret != CL_SUCCESS) { + MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(ret); return RET_ERROR; } return RET_OK; } -// Run Kernel with 1D, 2D, 3D group size, and local size can be empty. -int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector &global, - const std::vector &local, cl::CommandQueue *command_queue) { - if (command_queue == nullptr) { - command_queue = default_command_queue_.get(); - } - MS_ASSERT(local.size() == 0 || local.size() == global.size()); - std::vector internal_global_ws = global; - for (size_t i = 0; i < local.size(); ++i) { - internal_global_ws[i] = ROUND_UP(global[i], local[i]); - } - - MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size(); - for (size_t i = 0; i < global.size(); i++) { - MS_LOG(DEBUG) << "global[" << i << "] = " << global[i]; - } - for (size_t i = 0; i < local.size(); i++) { - MS_LOG(DEBUG) << "local[" << i << "] = " << local[i]; - } - - cl::Event event; - cl_int error = CL_SUCCESS; - if (local.size() == 0) { - error = - clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), nullptr, 0, nullptr, nullptr); - } else { - error = clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), local.data(), 0, - nullptr, nullptr); - } - - if (error != CL_SUCCESS) { - MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error); - return RET_ERROR; - } - MS_LOG(DEBUG) << "RunKernel success!"; - return RET_OK; -} - // Run Kernel with 1D, 2D, 3D group size, and local size can be empty. int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector &global, const std::vector &local, cl::CommandQueue *command_queue) { @@ -392,9 +353,6 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector MS_LOG(DEBUG) << "local[" << i << "] = " << local[i]; } - cl::Event event; - cl_int err = CL_SUCCESS; - cl::NDRange global_range = cl::NullRange; cl::NDRange local_range = cl::NullRange; if (global.size() == 1) { @@ -417,10 +375,12 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector return RET_ERROR; } - err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event); + cl::Event event; + cl_int ret = CL_SUCCESS; + ret = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event); - if (err != CL_SUCCESS) { - MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err); + if (ret != CL_SUCCESS) { + MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(ret); return RET_ERROR; } MS_LOG(DEBUG) << "RunKernel success!"; @@ -486,11 +446,11 @@ bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *pr } // build program with build options -bool OpenCLRuntime::BuildProgram(const std::string &build_options, cl::Program *program) { - cl_int ret = program->build({*device_}, build_options.c_str()); +bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Program &program) { + cl_int ret = program.build({*device_}, build_options.c_str()); if (ret != CL_SUCCESS) { - if (program->getBuildInfo(*device_) == CL_BUILD_ERROR) { - std::string build_log = program->getBuildInfo(*device_); + if (program.getBuildInfo(*device_) == CL_BUILD_ERROR) { + std::string build_log = program.getBuildInfo(*device_); MS_LOG(ERROR) << "Program build log: " << build_log; } MS_LOG(ERROR) << "Build program failed: " << CLErrorCode(ret); @@ -525,7 +485,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t return cl_ret == CL_SUCCESS; } -void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, cl::CommandQueue *command_queue, +void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { if (command_queue == nullptr) { command_queue = default_command_queue_.get(); @@ -534,7 +494,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, } int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { - if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { + if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { return RET_OK; } if (command_queue == nullptr) { @@ -543,7 +503,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command return command_queue->enqueueMapSVM(host_ptr, sync, flags, size); } -void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector ®ion, +void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector ®ion, cl::CommandQueue *command_queue) const { if (command_queue == nullptr) { command_queue = default_command_queue_.get(); @@ -555,7 +515,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, c return command_queue->enqueueMapImage(buffer, sync, flags, origin_, region_, &row_pitch, &slice_pitch); } -int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue) const { +int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const { if (command_queue == nullptr) { command_queue = default_command_queue_.get(); } @@ -563,7 +523,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm } int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const { - if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { + if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { return RET_OK; } if (command_queue == nullptr) { @@ -586,43 +546,55 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) { int OpenCLRuntime::GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id) { size_t max_work_group_size; - cl_int err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), + cl_int ret = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, nullptr); - if (err != CL_SUCCESS) { - MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(err); + if (ret != CL_SUCCESS) { + MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(ret); } return static_cast(max_work_group_size); } -bool OpenCLRuntime::CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name) { +cl::Kernel OpenCLRuntime::GetKernelFromBinary(const std::string &kernel_name) { cl_int ret = CL_SUCCESS; - kernel = clCreateKernel(il_program_, kernel_name.c_str(), &ret); + cl::Kernel kernel = cl::Kernel(binary_program_, kernel_name.c_str(), &ret); if (ret != CL_SUCCESS) { - MS_LOG(ERROR) << "Create kernel with IL failed: " << CLErrorCode(ret); + MS_LOG(ERROR) << "Create kernel with binary program failed: " << CLErrorCode(ret); } - return ret == CL_SUCCESS; + return kernel; } // build program with IL -bool OpenCLRuntime::CreateProgramFromIL(const std::vector program_binary, const std::string flag) { +cl::Program OpenCLRuntime::CreateProgramFromIL(const std::vector &binary, const std::string &flag) { #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - size_t program_length = program_binary.size(); - cl_int ret = CL_SUCCESS; - il_program_ = clCreateProgramWithIL((*context_)(), program_binary.data(), program_length, &ret); - if (ret != CL_SUCCESS) { - MS_LOG(ERROR) << "Create program with IL failed: " << CLErrorCode(ret); - return false; + cl::Program program = cl::Program(*context_, binary); + bool status = BuildProgram(default_build_opts_, program); + if (!status) { + MS_LOG(ERROR) << "Build program with IL failed!"; } - - ret = clBuildProgram(il_program_, 1, &(*device_)(), flag.c_str(), NULL, NULL); - if (ret != CL_SUCCESS) { - MS_LOG(ERROR) << "Build program with IL failed: " << CLErrorCode(ret); - } - return ret == CL_SUCCESS; + return program; #else MS_LOG(ERROR) << "Create program with IL failed! The compute capabitity of device should be 2.1 and higher."; - return false; + return cl::Program(); #endif } +// build program with binary +cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector> &binary, + const std::string &flag) { + cl::Program program = cl::Program(*context_, {*device_}, binary); + bool status = BuildProgram(default_build_opts_, program); + if (!status) { + MS_LOG(ERROR) << "Build program with binary failed!"; + } + return program; +} + +std::vector> OpenCLRuntime::GetProgramBinaries(const cl::Program &program) { + cl_int ret = CL_SUCCESS; + auto binary = program.getInfo(&ret); + if (ret != CL_SUCCESS) { + MS_LOG(ERROR) << "Get program binary failed: " << CLErrorCode(ret); + } + return binary; +} } // namespace mindspore::lite::opencl diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/opencl/opencl_runtime.h index bcf066e15cc..a4b18690279 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.h @@ -65,61 +65,68 @@ class OpenCLRuntime { GpuInfo GetGpuInfo(); bool GetFp16Enable() const; bool SetFp16Enable(bool enable); - const std::vector &GetWorkItemSize() { return max_work_item_sizes_; } - uint32_t GetImagePitchAlignment() { return image_pitch_align_; } - cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; } + bool GetSVMEnable() const { return svm_enable_; } + void SetSVMEnable(bool enable) { svm_enable_ = enable; } + const std::vector &GetWorkItemSize() const { return max_work_item_sizes_; } + uint32_t GetImagePitchAlignment() const { return image_pitch_align_; } + cl_device_svm_capabilities GetSVMCapabilities() const { return svm_enable_ ? svm_capabilities_ : 0; } template - typename std::enable_if::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index, - const T value) { - if (svm_capabilities_) { - MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value; - return clSetKernelArgSVMPointer(kernel, index, value); - } else { - MEM_TYPE mem_type = allocator_->GetMemType(value); - if (mem_type == MEM_TYPE::BUF) { + typename std::enable_if::value, cl_int>::type SetKernelArg(cl::Kernel &kernel, uint32_t index, + const T value, + const MemType mem_type = MemType::IMG) { + switch (mem_type) { + case MemType::SVM: { + MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value; + return kernel.setArg(index, value); + } + case MemType::BUF: { cl::Buffer *buffer = reinterpret_cast(allocator_->GetBuffer(value)); MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value; - return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)()); - } else { - cl::Image2D *image = reinterpret_cast(allocator_->GetImage(value)); - MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value; - return clSetKernelArg(kernel, index, sizeof((*image)()), &(*image)()); + return kernel.setArg(index, *buffer); } + case MemType::IMG: { + cl::Image2D *image = reinterpret_cast(allocator_->GetImage(value)); + if (image == nullptr) { + MS_LOG(WARNING) << "Can't get Image2D, try to use Buffer. Please confirm the buffer type."; + cl::Buffer *buffer = reinterpret_cast(allocator_->GetBuffer(value)); + MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value; + return kernel.setArg(index, *buffer); + } + MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value; + return kernel.setArg(index, *image); + } + default: + MS_LOG(ERROR) << "Unsupport opencl memory type: " << static_cast(mem_type); } } template - typename std::enable_if::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index, - const T value) { - return clSetKernelArg(kernel, index, sizeof(value), &value); + typename std::enable_if::value, cl_int>::type SetKernelArg( + cl::Kernel &kernel, uint32_t index, const T value, const MemType mem_type = MemType::IMG) { + return kernel.setArg(index, value); } - template - int SetKernelArg(cl::Kernel &kernel, uint32_t index, const T &value) { - return SetKernelArg(kernel(), index, value); - } - - bool CreateProgramFromIL(const std::vector program_binary, const std::string flag); - bool CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name); + cl::Program CreateProgramFromIL(const std::vector &binary, const std::string &flag); + cl::Program CreateProgramFromBinary(const std::vector> &binary, const std::string &flag); + cl::Kernel GetKernelFromBinary(const std::string &kernel_name); + std::vector> GetProgramBinaries(const cl::Program &program); bool LoadSource(const std::string &program_name, const std::string &source); int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, const std::set &build_options); - int RunKernel(const cl_kernel &kernel, const std::vector &global, const std::vector &local, - cl::CommandQueue *command_queue); int RunKernel(const cl::Kernel &kernel, const std::vector &global, const std::vector &local, cl::CommandQueue *command_queue); bool CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr, bool sync = false) const; bool CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr, bool sync = false) const; - void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr, + void *MapBuffer(const cl::Buffer &buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr, bool sync = false) const; - void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector ®ion, + void *MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector ®ion, cl::CommandQueue *command_queue = nullptr) const; int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr, bool sync = false) const; - int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; + int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr); @@ -136,7 +143,7 @@ class OpenCLRuntime { GpuInfo ParseGpuInfo(std::string device_name, std::string device_version); bool LoadProgram(const std::string &program_name, cl::Program *program); - bool BuildProgram(const std::string &build_options, cl::Program *program); + bool BuildProgram(const std::string &build_options, const cl::Program &program); private: static std::shared_ptr opencl_runtime_singleton_; @@ -146,7 +153,7 @@ class OpenCLRuntime { std::shared_ptr device_{nullptr}; std::shared_ptr allocator_{nullptr}; std::map program_map_{}; - cl_program il_program_{0}; + cl::Program binary_program_{0}; uint64_t global_memery_cachesize_{0}; int max_work_group_size; uint32_t compute_units_{0}; @@ -155,6 +162,7 @@ class OpenCLRuntime { GpuInfo gpu_info_; bool support_fp16_{false}; bool fp16_enable_{false}; + bool svm_enable_{false}; cl_device_svm_capabilities svm_capabilities_{0}; cl_uint image_pitch_align_{0}; std::vector max_work_item_sizes_; diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc index b97f0259eda..f6f0b8bd967 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc @@ -273,6 +273,15 @@ cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const ch return func(context, count, strings, lengths, errcode_ret); } +// clCreateProgramWithBinary wrapper, use OpenCLWrapper function. +cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *devices_list, + const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, + cl_int *errcode_ret) { + auto func = mindspore::lite::opencl::OpenCLWrapper::GetInstance()->clCreateProgramWithBinary; + MS_ASSERT(func != nullptr); + return func(context, num_devices, devices_list, lengths, binaries, binary_status, errcode_ret); +} + // clGetProgramInfo wrapper, use OpenCLWrapper function. cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) {