!4994 add create program from binary interface for opencl
Merge pull request !4994 from liuchao/master
This commit is contained in:
commit
3454a808c7
|
@ -103,10 +103,7 @@ int ArithmeticOpenCLKernel::Init() {
|
|||
|
||||
lite::STATUS error_code = RET_OK;
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
bool ret = runtime_->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
if (!ret) {
|
||||
error_code = RET_ERROR;
|
||||
}
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
if (out_mem_type_ == OpenCLMemType::IMG) {
|
||||
kernel_name += "_IMG";
|
||||
|
|
|
@ -42,7 +42,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
|
|||
std::string kernel_name = "conv2d_transpose2x2";
|
||||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
std::string source = conv2d_transpose2x2_source;
|
||||
std::set<std::string> build_options;
|
||||
|
|
|
@ -64,7 +64,7 @@ int DepthwiseConv2dOpenCLKernel::Init() {
|
|||
kernel_name += "_1x1";
|
||||
}
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
std::string program_name = "DepthwiseConv2d";
|
||||
std::set<std::string> build_options;
|
||||
|
|
|
@ -36,7 +36,7 @@ int MatMulOpenCLKernel::Init() {
|
|||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
std::set<std::string> build_options;
|
||||
std::string source = matmul_source;
|
||||
|
|
|
@ -62,7 +62,7 @@ int PoolingOpenCLKernel::Init() {
|
|||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
if (out_mem_type_ == OpenCLMemType::BUF) {
|
||||
kernel_name += "_BUF";
|
||||
|
|
|
@ -45,7 +45,7 @@ int ReshapeOpenCLKernel::Init() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
std::set<std::string> build_options;
|
||||
std::string source = reshape_source;
|
||||
|
|
|
@ -107,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() {
|
|||
MS_LOG(EXCEPTION) << "Init `Softmax` kernel failed: Unsupported axis: " << parameter_->axis_;
|
||||
}
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
runtime_->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
if (!is_image_out_) {
|
||||
out_mem_type_ = OpenCLMemType::BUF;
|
||||
|
|
|
@ -50,7 +50,7 @@ int ToFormatOpenCLKernel::Init() {
|
|||
|
||||
this->set_name(kernel_name);
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
std::set<std::string> build_options;
|
||||
std::string source = to_format_source;
|
||||
|
|
|
@ -41,7 +41,7 @@ int TransposeOpenCLKernel::Init() {
|
|||
kernel_name += "_IMG";
|
||||
}
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
|
||||
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
|
||||
#else
|
||||
std::set<std::string> build_options;
|
||||
std::string source = transpose_source;
|
||||
|
|
|
@ -81,7 +81,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
|
|||
void *device_ptr = nullptr;
|
||||
void *image_ptr = nullptr;
|
||||
|
||||
if (svm_capabilities && svm_on_) {
|
||||
if (svm_capabilities) {
|
||||
cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0;
|
||||
flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0;
|
||||
flags = flags | CL_MEM_READ_WRITE;
|
||||
|
@ -280,7 +280,7 @@ void OpenCLAllocator::Clear() {
|
|||
void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) {
|
||||
auto ocl_runtime = opencl::OpenCLRuntime::GetInstance();
|
||||
auto svm_capabilities = ocl_runtime->GetSVMCapabilities();
|
||||
if (svm_capabilities && svm_on_) {
|
||||
if (svm_capabilities) {
|
||||
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
|
||||
auto it = allocated_list_.find(host_ptr);
|
||||
if (it == allocated_list_.end()) {
|
||||
|
@ -356,8 +356,8 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
|
|||
}
|
||||
}
|
||||
|
||||
MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
|
||||
MEM_TYPE mem_type{MEM_TYPE::BUF};
|
||||
MemType OpenCLAllocator::GetMemType(void *host_ptr) {
|
||||
MemType mem_type{MemType::BUF};
|
||||
Lock();
|
||||
auto it = allocated_list_.find(host_ptr);
|
||||
if (it == allocated_list_.end()) {
|
||||
|
@ -367,9 +367,9 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
|
|||
}
|
||||
MemBuf *mem_buf = it->second;
|
||||
if (mem_buf->img_size.empty()) {
|
||||
mem_type = MEM_TYPE::BUF;
|
||||
mem_type = MemType::BUF;
|
||||
} else {
|
||||
mem_type = MEM_TYPE::IMG;
|
||||
mem_type = MemType::IMG;
|
||||
}
|
||||
UnLock();
|
||||
return mem_type;
|
||||
|
|
|
@ -40,7 +40,7 @@ struct OpenclMemory {
|
|||
OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER};
|
||||
};
|
||||
|
||||
enum class MEM_TYPE : char { BUF, IMG };
|
||||
enum class MemType : char { SVM, BUF, IMG };
|
||||
|
||||
class OpenCLAllocator : public Allocator {
|
||||
public:
|
||||
|
@ -58,7 +58,7 @@ class OpenCLAllocator : public Allocator {
|
|||
void *GetBuffer(void *host_ptr);
|
||||
void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true);
|
||||
int UnmapBuffer(void *host_ptr, void *command_queue = nullptr);
|
||||
MEM_TYPE GetMemType(void *host_ptr);
|
||||
MemType GetMemType(void *host_ptr);
|
||||
int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
|
||||
void *Prepare(void *ptr) override {
|
||||
if (ptr != nullptr) {
|
||||
|
@ -86,7 +86,6 @@ class OpenCLAllocator : public Allocator {
|
|||
// 6 is empirical value
|
||||
int shift_factor_ = 6;
|
||||
bool lock_flag_ = false;
|
||||
bool svm_on_{false};
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
|
|
@ -138,27 +138,27 @@ int OpenCLRuntime::Init() {
|
|||
<< max_work_item_sizes_[2];
|
||||
|
||||
gpu_info_ = ParseGpuInfo(device_name, device_version);
|
||||
cl_int err;
|
||||
cl_int ret;
|
||||
#if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120)
|
||||
// create context from glcontext
|
||||
MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
|
||||
cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(),
|
||||
CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then.";
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &err);
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
|
||||
}
|
||||
#else
|
||||
MS_LOG(INFO) << "Create common opencl context";
|
||||
// cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
|
||||
// CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
|
||||
// context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &err);
|
||||
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
|
||||
#endif
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
|
@ -170,9 +170,8 @@ int OpenCLRuntime::Init() {
|
|||
auto success = device_->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fp_config);
|
||||
support_fp16_ = CL_SUCCESS == success && fp_config > 0;
|
||||
|
||||
err = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_);
|
||||
svm_capabilities_ = 0;
|
||||
if (err != CL_SUCCESS || svm_capabilities_ == 0) {
|
||||
ret = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_);
|
||||
if (ret != CL_SUCCESS || svm_capabilities_ == 0) {
|
||||
svm_capabilities_ = 0;
|
||||
MS_LOG(INFO) << "SVM capalibilties: "
|
||||
<< "NONE";
|
||||
|
@ -204,16 +203,16 @@ int OpenCLRuntime::Init() {
|
|||
properties |= CL_QUEUE_PROFILING_ENABLE;
|
||||
#endif
|
||||
|
||||
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err);
|
||||
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
allocator_ = std::make_shared<OpenCLAllocator>();
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
std::string flag = "";
|
||||
CreateProgramFromIL(g_program_binary, flag);
|
||||
binary_program_ = CreateProgramFromIL(g_program_binary, flag);
|
||||
#endif
|
||||
init_done_ = true;
|
||||
MS_LOG(INFO) << "OpenCLRuntime init done!";
|
||||
|
@ -317,7 +316,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
|
|||
MS_LOG(ERROR) << "load program (" << program_name << ") failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
status = this->BuildProgram(build_options_str, &program);
|
||||
status = this->BuildProgram(build_options_str, program);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << program_name << " build failed!";
|
||||
return RET_ERROR;
|
||||
|
@ -325,53 +324,15 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
|
|||
program_map_.emplace(build_program_key, program);
|
||||
}
|
||||
|
||||
cl_int err;
|
||||
kernel = cl::Kernel(program, kernel_name.c_str(), &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err);
|
||||
cl_int ret;
|
||||
kernel = cl::Kernel(program, kernel_name.c_str(), &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
|
||||
int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t> &global,
|
||||
const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
}
|
||||
MS_ASSERT(local.size() == 0 || local.size() == global.size());
|
||||
std::vector<size_t> internal_global_ws = global;
|
||||
for (size_t i = 0; i < local.size(); ++i) {
|
||||
internal_global_ws[i] = ROUND_UP(global[i], local[i]);
|
||||
}
|
||||
|
||||
MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size();
|
||||
for (size_t i = 0; i < global.size(); i++) {
|
||||
MS_LOG(DEBUG) << "global[" << i << "] = " << global[i];
|
||||
}
|
||||
for (size_t i = 0; i < local.size(); i++) {
|
||||
MS_LOG(DEBUG) << "local[" << i << "] = " << local[i];
|
||||
}
|
||||
|
||||
cl::Event event;
|
||||
cl_int error = CL_SUCCESS;
|
||||
if (local.size() == 0) {
|
||||
error =
|
||||
clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), nullptr, 0, nullptr, nullptr);
|
||||
} else {
|
||||
error = clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), local.data(), 0,
|
||||
nullptr, nullptr);
|
||||
}
|
||||
|
||||
if (error != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error);
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(DEBUG) << "RunKernel success!";
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
|
||||
int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global,
|
||||
const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
|
||||
|
@ -392,9 +353,6 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
|
|||
MS_LOG(DEBUG) << "local[" << i << "] = " << local[i];
|
||||
}
|
||||
|
||||
cl::Event event;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
cl::NDRange global_range = cl::NullRange;
|
||||
cl::NDRange local_range = cl::NullRange;
|
||||
if (global.size() == 1) {
|
||||
|
@ -417,10 +375,12 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
|
||||
cl::Event event;
|
||||
cl_int ret = CL_SUCCESS;
|
||||
ret = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(DEBUG) << "RunKernel success!";
|
||||
|
@ -486,11 +446,11 @@ bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *pr
|
|||
}
|
||||
|
||||
// build program with build options
|
||||
bool OpenCLRuntime::BuildProgram(const std::string &build_options, cl::Program *program) {
|
||||
cl_int ret = program->build({*device_}, build_options.c_str());
|
||||
bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Program &program) {
|
||||
cl_int ret = program.build({*device_}, build_options.c_str());
|
||||
if (ret != CL_SUCCESS) {
|
||||
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
|
||||
std::string build_log = program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
|
||||
if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
|
||||
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
|
||||
MS_LOG(ERROR) << "Program build log: " << build_log;
|
||||
}
|
||||
MS_LOG(ERROR) << "Build program failed: " << CLErrorCode(ret);
|
||||
|
@ -525,7 +485,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t
|
|||
return cl_ret == CL_SUCCESS;
|
||||
}
|
||||
|
||||
void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, cl::CommandQueue *command_queue,
|
||||
void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue,
|
||||
bool sync) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
|
@ -534,7 +494,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size,
|
|||
}
|
||||
|
||||
int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
|
||||
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
|
||||
if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (command_queue == nullptr) {
|
||||
|
@ -543,7 +503,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
|
|||
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
|
||||
}
|
||||
|
||||
void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> ®ion,
|
||||
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> ®ion,
|
||||
cl::CommandQueue *command_queue) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
|
@ -555,7 +515,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, c
|
|||
return command_queue->enqueueMapImage(buffer, sync, flags, origin_, region_, &row_pitch, &slice_pitch);
|
||||
}
|
||||
|
||||
int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
|
||||
int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
}
|
||||
|
@ -563,7 +523,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm
|
|||
}
|
||||
|
||||
int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const {
|
||||
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
|
||||
if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (command_queue == nullptr) {
|
||||
|
@ -586,43 +546,55 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
|
|||
|
||||
int OpenCLRuntime::GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id) {
|
||||
size_t max_work_group_size;
|
||||
cl_int err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
|
||||
cl_int ret = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
|
||||
&max_work_group_size, nullptr);
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(err);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(ret);
|
||||
}
|
||||
return static_cast<int>(max_work_group_size);
|
||||
}
|
||||
|
||||
bool OpenCLRuntime::CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name) {
|
||||
cl::Kernel OpenCLRuntime::GetKernelFromBinary(const std::string &kernel_name) {
|
||||
cl_int ret = CL_SUCCESS;
|
||||
kernel = clCreateKernel(il_program_, kernel_name.c_str(), &ret);
|
||||
cl::Kernel kernel = cl::Kernel(binary_program_, kernel_name.c_str(), &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create kernel with IL failed: " << CLErrorCode(ret);
|
||||
MS_LOG(ERROR) << "Create kernel with binary program failed: " << CLErrorCode(ret);
|
||||
}
|
||||
return ret == CL_SUCCESS;
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// build program with IL
|
||||
bool OpenCLRuntime::CreateProgramFromIL(const std::vector<u_char> program_binary, const std::string flag) {
|
||||
cl::Program OpenCLRuntime::CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag) {
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 210
|
||||
size_t program_length = program_binary.size();
|
||||
cl_int ret = CL_SUCCESS;
|
||||
il_program_ = clCreateProgramWithIL((*context_)(), program_binary.data(), program_length, &ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create program with IL failed: " << CLErrorCode(ret);
|
||||
return false;
|
||||
cl::Program program = cl::Program(*context_, binary);
|
||||
bool status = BuildProgram(default_build_opts_, program);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << "Build program with IL failed!";
|
||||
}
|
||||
|
||||
ret = clBuildProgram(il_program_, 1, &(*device_)(), flag.c_str(), NULL, NULL);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Build program with IL failed: " << CLErrorCode(ret);
|
||||
}
|
||||
return ret == CL_SUCCESS;
|
||||
return program;
|
||||
#else
|
||||
MS_LOG(ERROR) << "Create program with IL failed! The compute capabitity of device should be 2.1 and higher.";
|
||||
return false;
|
||||
return cl::Program();
|
||||
#endif
|
||||
}
|
||||
|
||||
// build program with binary
|
||||
cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary,
|
||||
const std::string &flag) {
|
||||
cl::Program program = cl::Program(*context_, {*device_}, binary);
|
||||
bool status = BuildProgram(default_build_opts_, program);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << "Build program with binary failed!";
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
std::vector<std::vector<unsigned char>> OpenCLRuntime::GetProgramBinaries(const cl::Program &program) {
|
||||
cl_int ret = CL_SUCCESS;
|
||||
auto binary = program.getInfo<CL_PROGRAM_BINARIES>(&ret);
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Get program binary failed: " << CLErrorCode(ret);
|
||||
}
|
||||
return binary;
|
||||
}
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
|
|
@ -65,61 +65,68 @@ class OpenCLRuntime {
|
|||
GpuInfo GetGpuInfo();
|
||||
bool GetFp16Enable() const;
|
||||
bool SetFp16Enable(bool enable);
|
||||
const std::vector<size_t> &GetWorkItemSize() { return max_work_item_sizes_; }
|
||||
uint32_t GetImagePitchAlignment() { return image_pitch_align_; }
|
||||
cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; }
|
||||
bool GetSVMEnable() const { return svm_enable_; }
|
||||
void SetSVMEnable(bool enable) { svm_enable_ = enable; }
|
||||
const std::vector<size_t> &GetWorkItemSize() const { return max_work_item_sizes_; }
|
||||
uint32_t GetImagePitchAlignment() const { return image_pitch_align_; }
|
||||
cl_device_svm_capabilities GetSVMCapabilities() const { return svm_enable_ ? svm_capabilities_ : 0; }
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index,
|
||||
const T value) {
|
||||
if (svm_capabilities_) {
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
|
||||
return clSetKernelArgSVMPointer(kernel, index, value);
|
||||
} else {
|
||||
MEM_TYPE mem_type = allocator_->GetMemType(value);
|
||||
if (mem_type == MEM_TYPE::BUF) {
|
||||
typename std::enable_if<std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl::Kernel &kernel, uint32_t index,
|
||||
const T value,
|
||||
const MemType mem_type = MemType::IMG) {
|
||||
switch (mem_type) {
|
||||
case MemType::SVM: {
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
|
||||
return kernel.setArg(index, value);
|
||||
}
|
||||
case MemType::BUF: {
|
||||
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
|
||||
return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)());
|
||||
} else {
|
||||
cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
|
||||
return clSetKernelArg(kernel, index, sizeof((*image)()), &(*image)());
|
||||
return kernel.setArg(index, *buffer);
|
||||
}
|
||||
case MemType::IMG: {
|
||||
cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
|
||||
if (image == nullptr) {
|
||||
MS_LOG(WARNING) << "Can't get Image2D, try to use Buffer. Please confirm the buffer type.";
|
||||
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
|
||||
return kernel.setArg(index, *buffer);
|
||||
}
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
|
||||
return kernel.setArg(index, *image);
|
||||
}
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupport opencl memory type: " << static_cast<int>(mem_type);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<!std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index,
|
||||
const T value) {
|
||||
return clSetKernelArg(kernel, index, sizeof(value), &value);
|
||||
typename std::enable_if<!std::is_pointer<T>::value, cl_int>::type SetKernelArg(
|
||||
cl::Kernel &kernel, uint32_t index, const T value, const MemType mem_type = MemType::IMG) {
|
||||
return kernel.setArg(index, value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int SetKernelArg(cl::Kernel &kernel, uint32_t index, const T &value) {
|
||||
return SetKernelArg(kernel(), index, value);
|
||||
}
|
||||
|
||||
bool CreateProgramFromIL(const std::vector<u_char> program_binary, const std::string flag);
|
||||
bool CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name);
|
||||
cl::Program CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag);
|
||||
cl::Program CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary, const std::string &flag);
|
||||
cl::Kernel GetKernelFromBinary(const std::string &kernel_name);
|
||||
std::vector<std::vector<unsigned char>> GetProgramBinaries(const cl::Program &program);
|
||||
bool LoadSource(const std::string &program_name, const std::string &source);
|
||||
int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name,
|
||||
const std::set<std::string> &build_options);
|
||||
int RunKernel(const cl_kernel &kernel, const std::vector<size_t> &global, const std::vector<size_t> &local,
|
||||
cl::CommandQueue *command_queue);
|
||||
int RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global, const std::vector<size_t> &local,
|
||||
cl::CommandQueue *command_queue);
|
||||
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr,
|
||||
bool sync = false) const;
|
||||
bool CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr,
|
||||
bool sync = false) const;
|
||||
void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
|
||||
void *MapBuffer(const cl::Buffer &buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
|
||||
bool sync = false) const;
|
||||
void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> ®ion,
|
||||
void *MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> ®ion,
|
||||
cl::CommandQueue *command_queue = nullptr) const;
|
||||
int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
|
||||
bool sync = false) const;
|
||||
int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
|
||||
int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
|
||||
int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
|
||||
bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr);
|
||||
|
||||
|
@ -136,7 +143,7 @@ class OpenCLRuntime {
|
|||
GpuInfo ParseGpuInfo(std::string device_name, std::string device_version);
|
||||
|
||||
bool LoadProgram(const std::string &program_name, cl::Program *program);
|
||||
bool BuildProgram(const std::string &build_options, cl::Program *program);
|
||||
bool BuildProgram(const std::string &build_options, const cl::Program &program);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_;
|
||||
|
@ -146,7 +153,7 @@ class OpenCLRuntime {
|
|||
std::shared_ptr<cl::Device> device_{nullptr};
|
||||
std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
|
||||
std::map<std::string, cl::Program> program_map_{};
|
||||
cl_program il_program_{0};
|
||||
cl::Program binary_program_{0};
|
||||
uint64_t global_memery_cachesize_{0};
|
||||
int max_work_group_size;
|
||||
uint32_t compute_units_{0};
|
||||
|
@ -155,6 +162,7 @@ class OpenCLRuntime {
|
|||
GpuInfo gpu_info_;
|
||||
bool support_fp16_{false};
|
||||
bool fp16_enable_{false};
|
||||
bool svm_enable_{false};
|
||||
cl_device_svm_capabilities svm_capabilities_{0};
|
||||
cl_uint image_pitch_align_{0};
|
||||
std::vector<size_t> max_work_item_sizes_;
|
||||
|
|
|
@ -273,6 +273,15 @@ cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const ch
|
|||
return func(context, count, strings, lengths, errcode_ret);
|
||||
}
|
||||
|
||||
// clCreateProgramWithBinary wrapper, use OpenCLWrapper function.
|
||||
cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *devices_list,
|
||||
const size_t *lengths, const unsigned char **binaries, cl_int *binary_status,
|
||||
cl_int *errcode_ret) {
|
||||
auto func = mindspore::lite::opencl::OpenCLWrapper::GetInstance()->clCreateProgramWithBinary;
|
||||
MS_ASSERT(func != nullptr);
|
||||
return func(context, num_devices, devices_list, lengths, binaries, binary_status, errcode_ret);
|
||||
}
|
||||
|
||||
// clGetProgramInfo wrapper, use OpenCLWrapper function.
|
||||
cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
|
|
Loading…
Reference in New Issue