!4994 add create program from binary interface for opencl

Merge pull request !4994 from liuchao/master
This commit is contained in:
mindspore-ci-bot 2020-08-24 16:25:44 +08:00 committed by Gitee
commit 3454a808c7
14 changed files with 132 additions and 147 deletions

View File

@ -103,10 +103,7 @@ int ArithmeticOpenCLKernel::Init() {
lite::STATUS error_code = RET_OK;
#ifdef PROGRAM_WITH_IL
bool ret = runtime_->CreateKernelFromIL(kernel_(), kernel_name);
if (!ret) {
error_code = RET_ERROR;
}
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
if (out_mem_type_ == OpenCLMemType::IMG) {
kernel_name += "_IMG";

View File

@ -42,7 +42,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
std::string kernel_name = "conv2d_transpose2x2";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
std::string source = conv2d_transpose2x2_source;
std::set<std::string> build_options;

View File

@ -64,7 +64,7 @@ int DepthwiseConv2dOpenCLKernel::Init() {
kernel_name += "_1x1";
}
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
std::string program_name = "DepthwiseConv2d";
std::set<std::string> build_options;

View File

@ -36,7 +36,7 @@ int MatMulOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = matmul_source;

View File

@ -62,7 +62,7 @@ int PoolingOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
if (out_mem_type_ == OpenCLMemType::BUF) {
kernel_name += "_BUF";

View File

@ -45,7 +45,7 @@ int ReshapeOpenCLKernel::Init() {
return RET_ERROR;
}
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = reshape_source;

View File

@ -107,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() {
MS_LOG(EXCEPTION) << "Init `Softmax` kernel failed: Unsupported axis: " << parameter_->axis_;
}
#ifdef PROGRAM_WITH_IL
runtime_->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
if (!is_image_out_) {
out_mem_type_ = OpenCLMemType::BUF;

View File

@ -50,7 +50,7 @@ int ToFormatOpenCLKernel::Init() {
this->set_name(kernel_name);
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = to_format_source;

View File

@ -41,7 +41,7 @@ int TransposeOpenCLKernel::Init() {
kernel_name += "_IMG";
}
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = transpose_source;

View File

@ -81,7 +81,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
void *device_ptr = nullptr;
void *image_ptr = nullptr;
if (svm_capabilities && svm_on_) {
if (svm_capabilities) {
cl_svm_mem_flags flags = (svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0;
flags |= (svm_capabilities & CL_DEVICE_SVM_ATOMICS) ? CL_MEM_SVM_ATOMICS : 0;
flags = flags | CL_MEM_READ_WRITE;
@ -280,7 +280,7 @@ void OpenCLAllocator::Clear() {
void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) {
auto ocl_runtime = opencl::OpenCLRuntime::GetInstance();
auto svm_capabilities = ocl_runtime->GetSVMCapabilities();
if (svm_capabilities && svm_on_) {
if (svm_capabilities) {
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) {
@ -356,8 +356,8 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
}
}
MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
MEM_TYPE mem_type{MEM_TYPE::BUF};
MemType OpenCLAllocator::GetMemType(void *host_ptr) {
MemType mem_type{MemType::BUF};
Lock();
auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) {
@ -367,9 +367,9 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
}
MemBuf *mem_buf = it->second;
if (mem_buf->img_size.empty()) {
mem_type = MEM_TYPE::BUF;
mem_type = MemType::BUF;
} else {
mem_type = MEM_TYPE::IMG;
mem_type = MemType::IMG;
}
UnLock();
return mem_type;

View File

@ -40,7 +40,7 @@ struct OpenclMemory {
OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER};
};
enum class MEM_TYPE : char { BUF, IMG };
enum class MemType : char { SVM, BUF, IMG };
class OpenCLAllocator : public Allocator {
public:
@ -58,7 +58,7 @@ class OpenCLAllocator : public Allocator {
void *GetBuffer(void *host_ptr);
void *MapBuffer(void *host_ptr, int flags, void *command_queue = nullptr, bool sync = true);
int UnmapBuffer(void *host_ptr, void *command_queue = nullptr);
MEM_TYPE GetMemType(void *host_ptr);
MemType GetMemType(void *host_ptr);
int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
void *Prepare(void *ptr) override {
if (ptr != nullptr) {
@ -86,7 +86,6 @@ class OpenCLAllocator : public Allocator {
// 6 is empirical value
int shift_factor_ = 6;
bool lock_flag_ = false;
bool svm_on_{false};
};
} // namespace mindspore::lite::opencl

View File

@ -138,27 +138,27 @@ int OpenCLRuntime::Init() {
<< max_work_item_sizes_[2];
gpu_info_ = ParseGpuInfo(device_name, device_version);
cl_int err;
cl_int ret;
#if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120)
// create context from glcontext
MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(),
CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0};
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret);
if (err != CL_SUCCESS) {
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then.";
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &err);
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
}
#else
MS_LOG(INFO) << "Create common opencl context";
// cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](),
// CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0};
// context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err);
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &err);
context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret);
#endif
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
return RET_ERROR;
}
@ -170,9 +170,8 @@ int OpenCLRuntime::Init() {
auto success = device_->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fp_config);
support_fp16_ = CL_SUCCESS == success && fp_config > 0;
err = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_);
svm_capabilities_ = 0;
if (err != CL_SUCCESS || svm_capabilities_ == 0) {
ret = device_->getInfo(CL_DEVICE_SVM_CAPABILITIES, &svm_capabilities_);
if (ret != CL_SUCCESS || svm_capabilities_ == 0) {
svm_capabilities_ = 0;
MS_LOG(INFO) << "SVM capalibilties: "
<< "NONE";
@ -204,16 +203,16 @@ int OpenCLRuntime::Init() {
properties |= CL_QUEUE_PROFILING_ENABLE;
#endif
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &err);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err);
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
return RET_ERROR;
}
allocator_ = std::make_shared<OpenCLAllocator>();
#ifdef PROGRAM_WITH_IL
std::string flag = "";
CreateProgramFromIL(g_program_binary, flag);
binary_program_ = CreateProgramFromIL(g_program_binary, flag);
#endif
init_done_ = true;
MS_LOG(INFO) << "OpenCLRuntime init done!";
@ -317,7 +316,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
MS_LOG(ERROR) << "load program (" << program_name << ") failed!";
return RET_ERROR;
}
status = this->BuildProgram(build_options_str, &program);
status = this->BuildProgram(build_options_str, program);
if (!status) {
MS_LOG(ERROR) << program_name << " build failed!";
return RET_ERROR;
@ -325,53 +324,15 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
program_map_.emplace(build_program_key, program);
}
cl_int err;
kernel = cl::Kernel(program, kernel_name.c_str(), &err);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err);
cl_int ret;
kernel = cl::Kernel(program, kernel_name.c_str(), &ret);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(ret);
return RET_ERROR;
}
return RET_OK;
}
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t> &global,
const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
}
MS_ASSERT(local.size() == 0 || local.size() == global.size());
std::vector<size_t> internal_global_ws = global;
for (size_t i = 0; i < local.size(); ++i) {
internal_global_ws[i] = ROUND_UP(global[i], local[i]);
}
MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size();
for (size_t i = 0; i < global.size(); i++) {
MS_LOG(DEBUG) << "global[" << i << "] = " << global[i];
}
for (size_t i = 0; i < local.size(); i++) {
MS_LOG(DEBUG) << "local[" << i << "] = " << local[i];
}
cl::Event event;
cl_int error = CL_SUCCESS;
if (local.size() == 0) {
error =
clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), nullptr, 0, nullptr, nullptr);
} else {
error = clEnqueueNDRangeKernel((*command_queue)(), kernel, global.size(), 0, global.data(), local.data(), 0,
nullptr, nullptr);
}
if (error != CL_SUCCESS) {
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error);
return RET_ERROR;
}
MS_LOG(DEBUG) << "RunKernel success!";
return RET_OK;
}
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global,
const std::vector<size_t> &local, cl::CommandQueue *command_queue) {
@ -392,9 +353,6 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
MS_LOG(DEBUG) << "local[" << i << "] = " << local[i];
}
cl::Event event;
cl_int err = CL_SUCCESS;
cl::NDRange global_range = cl::NullRange;
cl::NDRange local_range = cl::NullRange;
if (global.size() == 1) {
@ -417,10 +375,12 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
return RET_ERROR;
}
err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
cl::Event event;
cl_int ret = CL_SUCCESS;
ret = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(ret);
return RET_ERROR;
}
MS_LOG(DEBUG) << "RunKernel success!";
@ -486,11 +446,11 @@ bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *pr
}
// build program with build options
bool OpenCLRuntime::BuildProgram(const std::string &build_options, cl::Program *program) {
cl_int ret = program->build({*device_}, build_options.c_str());
bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Program &program) {
cl_int ret = program.build({*device_}, build_options.c_str());
if (ret != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
std::string build_log = program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
MS_LOG(ERROR) << "Program build log: " << build_log;
}
MS_LOG(ERROR) << "Build program failed: " << CLErrorCode(ret);
@ -525,7 +485,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t
return cl_ret == CL_SUCCESS;
}
void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, cl::CommandQueue *command_queue,
void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue,
bool sync) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
@ -534,7 +494,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size,
}
int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
return RET_OK;
}
if (command_queue == nullptr) {
@ -543,7 +503,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
}
void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
cl::CommandQueue *command_queue) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
@ -555,7 +515,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D buffer, bool sync, int flags, c
return command_queue->enqueueMapImage(buffer, sync, flags, origin_, region_, &row_pitch, &slice_pitch);
}
int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const {
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
}
@ -563,7 +523,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm
}
int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const {
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
return RET_OK;
}
if (command_queue == nullptr) {
@ -586,43 +546,55 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
int OpenCLRuntime::GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id) {
size_t max_work_group_size;
cl_int err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
cl_int ret = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
&max_work_group_size, nullptr);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(err);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Failed to get info CL_KERNEL_WORK_GROUP_SIZE " << CLErrorCode(ret);
}
return static_cast<int>(max_work_group_size);
}
bool OpenCLRuntime::CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name) {
cl::Kernel OpenCLRuntime::GetKernelFromBinary(const std::string &kernel_name) {
cl_int ret = CL_SUCCESS;
kernel = clCreateKernel(il_program_, kernel_name.c_str(), &ret);
cl::Kernel kernel = cl::Kernel(binary_program_, kernel_name.c_str(), &ret);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create kernel with IL failed: " << CLErrorCode(ret);
MS_LOG(ERROR) << "Create kernel with binary program failed: " << CLErrorCode(ret);
}
return ret == CL_SUCCESS;
return kernel;
}
// build program with IL
bool OpenCLRuntime::CreateProgramFromIL(const std::vector<u_char> program_binary, const std::string flag) {
cl::Program OpenCLRuntime::CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag) {
#if CL_HPP_TARGET_OPENCL_VERSION >= 210
size_t program_length = program_binary.size();
cl_int ret = CL_SUCCESS;
il_program_ = clCreateProgramWithIL((*context_)(), program_binary.data(), program_length, &ret);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create program with IL failed: " << CLErrorCode(ret);
return false;
cl::Program program = cl::Program(*context_, binary);
bool status = BuildProgram(default_build_opts_, program);
if (!status) {
MS_LOG(ERROR) << "Build program with IL failed!";
}
ret = clBuildProgram(il_program_, 1, &(*device_)(), flag.c_str(), NULL, NULL);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Build program with IL failed: " << CLErrorCode(ret);
}
return ret == CL_SUCCESS;
return program;
#else
MS_LOG(ERROR) << "Create program with IL failed! The compute capabitity of device should be 2.1 and higher.";
return false;
return cl::Program();
#endif
}
// build program with binary
cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary,
const std::string &flag) {
cl::Program program = cl::Program(*context_, {*device_}, binary);
bool status = BuildProgram(default_build_opts_, program);
if (!status) {
MS_LOG(ERROR) << "Build program with binary failed!";
}
return program;
}
std::vector<std::vector<unsigned char>> OpenCLRuntime::GetProgramBinaries(const cl::Program &program) {
cl_int ret = CL_SUCCESS;
auto binary = program.getInfo<CL_PROGRAM_BINARIES>(&ret);
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Get program binary failed: " << CLErrorCode(ret);
}
return binary;
}
} // namespace mindspore::lite::opencl

View File

@ -65,61 +65,68 @@ class OpenCLRuntime {
GpuInfo GetGpuInfo();
bool GetFp16Enable() const;
bool SetFp16Enable(bool enable);
const std::vector<size_t> &GetWorkItemSize() { return max_work_item_sizes_; }
uint32_t GetImagePitchAlignment() { return image_pitch_align_; }
cl_device_svm_capabilities GetSVMCapabilities() const { return svm_capabilities_; }
bool GetSVMEnable() const { return svm_enable_; }
void SetSVMEnable(bool enable) { svm_enable_ = enable; }
const std::vector<size_t> &GetWorkItemSize() const { return max_work_item_sizes_; }
uint32_t GetImagePitchAlignment() const { return image_pitch_align_; }
cl_device_svm_capabilities GetSVMCapabilities() const { return svm_enable_ ? svm_capabilities_ : 0; }
template <typename T>
typename std::enable_if<std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index,
const T value) {
if (svm_capabilities_) {
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
return clSetKernelArgSVMPointer(kernel, index, value);
} else {
MEM_TYPE mem_type = allocator_->GetMemType(value);
if (mem_type == MEM_TYPE::BUF) {
typename std::enable_if<std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl::Kernel &kernel, uint32_t index,
const T value,
const MemType mem_type = MemType::IMG) {
switch (mem_type) {
case MemType::SVM: {
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
return kernel.setArg(index, value);
}
case MemType::BUF: {
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
return clSetKernelArg(kernel, index, sizeof((*buffer)()), &(*buffer)());
} else {
cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
return clSetKernelArg(kernel, index, sizeof((*image)()), &(*image)());
return kernel.setArg(index, *buffer);
}
case MemType::IMG: {
cl::Image2D *image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(value));
if (image == nullptr) {
MS_LOG(WARNING) << "Can't get Image2D, try to use Buffer. Please confirm the buffer type.";
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
return kernel.setArg(index, *buffer);
}
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value;
return kernel.setArg(index, *image);
}
default:
MS_LOG(ERROR) << "Unsupport opencl memory type: " << static_cast<int>(mem_type);
}
}
template <typename T>
typename std::enable_if<!std::is_pointer<T>::value, cl_int>::type SetKernelArg(cl_kernel &kernel, uint32_t index,
const T value) {
return clSetKernelArg(kernel, index, sizeof(value), &value);
typename std::enable_if<!std::is_pointer<T>::value, cl_int>::type SetKernelArg(
cl::Kernel &kernel, uint32_t index, const T value, const MemType mem_type = MemType::IMG) {
return kernel.setArg(index, value);
}
template <typename T>
int SetKernelArg(cl::Kernel &kernel, uint32_t index, const T &value) {
return SetKernelArg(kernel(), index, value);
}
bool CreateProgramFromIL(const std::vector<u_char> program_binary, const std::string flag);
bool CreateKernelFromIL(cl_kernel &kernel, const std::string kernel_name);
cl::Program CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag);
cl::Program CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary, const std::string &flag);
cl::Kernel GetKernelFromBinary(const std::string &kernel_name);
std::vector<std::vector<unsigned char>> GetProgramBinaries(const cl::Program &program);
bool LoadSource(const std::string &program_name, const std::string &source);
int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name,
const std::set<std::string> &build_options);
int RunKernel(const cl_kernel &kernel, const std::vector<size_t> &global, const std::vector<size_t> &local,
cl::CommandQueue *command_queue);
int RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global, const std::vector<size_t> &local,
cl::CommandQueue *command_queue);
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const;
bool CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const;
void *MapBuffer(const cl::Buffer buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
void *MapBuffer(const cl::Buffer &buffer, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const;
void *MapBuffer(const cl::Image2D buffer, bool sync, int flags, const std::vector<size_t> &region,
void *MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
cl::CommandQueue *command_queue = nullptr) const;
int MapBuffer(void *host_ptr, int map_flags, size_t size, cl::CommandQueue *command_queue = nullptr,
bool sync = false) const;
int UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const;
bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr);
@ -136,7 +143,7 @@ class OpenCLRuntime {
GpuInfo ParseGpuInfo(std::string device_name, std::string device_version);
bool LoadProgram(const std::string &program_name, cl::Program *program);
bool BuildProgram(const std::string &build_options, cl::Program *program);
bool BuildProgram(const std::string &build_options, const cl::Program &program);
private:
static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_;
@ -146,7 +153,7 @@ class OpenCLRuntime {
std::shared_ptr<cl::Device> device_{nullptr};
std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
std::map<std::string, cl::Program> program_map_{};
cl_program il_program_{0};
cl::Program binary_program_{0};
uint64_t global_memery_cachesize_{0};
int max_work_group_size;
uint32_t compute_units_{0};
@ -155,6 +162,7 @@ class OpenCLRuntime {
GpuInfo gpu_info_;
bool support_fp16_{false};
bool fp16_enable_{false};
bool svm_enable_{false};
cl_device_svm_capabilities svm_capabilities_{0};
cl_uint image_pitch_align_{0};
std::vector<size_t> max_work_item_sizes_;

View File

@ -273,6 +273,15 @@ cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const ch
return func(context, count, strings, lengths, errcode_ret);
}
// clCreateProgramWithBinary wrapper, use OpenCLWrapper function.
cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id *devices_list,
const size_t *lengths, const unsigned char **binaries, cl_int *binary_status,
cl_int *errcode_ret) {
auto func = mindspore::lite::opencl::OpenCLWrapper::GetInstance()->clCreateProgramWithBinary;
MS_ASSERT(func != nullptr);
return func(context, num_devices, devices_list, lengths, binaries, binary_status, errcode_ret);
}
// clGetProgramInfo wrapper, use OpenCLWrapper function.
cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {