forked from mindspore-Ecosystem/mindspore
!4938 auto umap buffer for opencl and clean code
Merge pull request !4938 from liuchao/lite-master
This commit is contained in:
commit
387f4d445f
|
@ -70,7 +70,7 @@ int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si
|
|||
img_size->clear();
|
||||
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
|
||||
*img_size = vec;
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticOpenCLKernel::Init() {
|
||||
|
|
|
@ -59,7 +59,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Conv2dTransposeOpenCLKernel::ReSize() { return 0; }
|
||||
int Conv2dTransposeOpenCLKernel::ReSize() { return RET_OK; }
|
||||
|
||||
void Conv2dTransposeOpenCLKernel::PadWeight() {
|
||||
ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
|
|
|
@ -67,10 +67,10 @@ int MatMulOpenCLKernel::Init() {
|
|||
in_tensors_[0]->SetFormat(schema::Format_NC4);
|
||||
}
|
||||
MS_LOG(DEBUG) << kernel_name << " Init Done!";
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulOpenCLKernel::ReSize() { return 0; }
|
||||
int MatMulOpenCLKernel::ReSize() { return RET_OK; }
|
||||
|
||||
void MatMulOpenCLKernel::PadWeight() {
|
||||
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
|
||||
|
@ -147,7 +147,7 @@ int MatMulOpenCLKernel::Run() {
|
|||
ocl_runtime->SetKernelArg(kernel_, arg_count++, sizeCO);
|
||||
ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0);
|
||||
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
|
|
|
@ -63,7 +63,7 @@ int ReshapeOpenCLKernel::Init() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ReshapeOpenCLKernel::ReSize() { return 0; }
|
||||
int ReshapeOpenCLKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
|
||||
size_t im_dst_x, im_dst_y;
|
||||
|
|
|
@ -64,7 +64,7 @@ int TransposeOpenCLKernel::Init() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int TransposeOpenCLKernel::ReSize() { return 0; }
|
||||
int TransposeOpenCLKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
|
||||
size_t im_dst_x, im_dst_y;
|
||||
|
@ -100,7 +100,7 @@ int TransposeOpenCLKernel::Run() {
|
|||
ocl_runtime->SetKernelArg(kernel_, 2, HW);
|
||||
ocl_runtime->SetKernelArg(kernel_, 3, C);
|
||||
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *OpenCLTransposeKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
|
@ -37,15 +38,15 @@ class OpenCLKernel : public LiteKernel {
|
|||
const std::vector<lite::tensor::Tensor *> &outputs)
|
||||
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {}
|
||||
|
||||
virtual int Init() { return -1; }
|
||||
virtual int Prepare() { return -1; }
|
||||
virtual int InferShape() { return -1; }
|
||||
virtual int ReSize() { return -1; }
|
||||
virtual int Run() { return -1; }
|
||||
virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return -1; }
|
||||
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return -1; }
|
||||
virtual int Init() { return RET_ERROR; }
|
||||
virtual int Prepare() { return RET_ERROR; }
|
||||
virtual int InferShape() { return RET_ERROR; }
|
||||
virtual int ReSize() { return RET_ERROR; }
|
||||
virtual int Run() { return RET_ERROR; }
|
||||
virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return RET_ERROR; }
|
||||
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; }
|
||||
virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) {
|
||||
return -1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
OpenCLMemType GetMemType() { return out_mem_type_; }
|
||||
void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; }
|
||||
|
|
|
@ -91,15 +91,15 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
|
|||
cl::Buffer *buffer = new (std::nothrow)
|
||||
cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret);
|
||||
if (buffer == nullptr || ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")";
|
||||
UnLock();
|
||||
MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")";
|
||||
return nullptr;
|
||||
}
|
||||
device_ptr = static_cast<void *>(buffer);
|
||||
host_ptr = ocl_runtime->MapBuffer(*buffer, CL_MAP_READ | CL_MAP_WRITE, size);
|
||||
if (host_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
|
||||
UnLock();
|
||||
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
|
||||
return nullptr;
|
||||
}
|
||||
cl::Memory *mem = buffer;
|
||||
|
@ -199,12 +199,15 @@ void OpenCLAllocator::Free(void *buf) {
|
|||
Lock();
|
||||
auto iter = allocated_list_.find(buf);
|
||||
if (iter != allocated_list_.end()) {
|
||||
if (iter->second->map_flags) {
|
||||
UnmapBuffer(buf);
|
||||
iter->second->map_flags = false;
|
||||
}
|
||||
auto mem_buf = iter->second;
|
||||
allocated_list_.erase(iter);
|
||||
free_list_.insert(std::make_pair(mem_buf->size_, mem_buf));
|
||||
UnLock();
|
||||
buf = nullptr;
|
||||
MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
|
||||
MS_LOG(DEBUG) << "Free device buffer. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
|
||||
<< ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_
|
||||
<< ", free list size: " << free_list_.size();
|
||||
return;
|
||||
|
@ -291,10 +294,16 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
|
|||
Lock();
|
||||
auto it = allocated_list_.find(host_ptr);
|
||||
if (it == allocated_list_.end()) {
|
||||
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
|
||||
UnLock();
|
||||
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (it->second->map_flags) {
|
||||
UnLock();
|
||||
MS_LOG(WARNING) << "Host ptr " << host_ptr << " has mapped";
|
||||
return host_ptr;
|
||||
}
|
||||
MemBuf *mem_buf = it->second;
|
||||
void *new_host_ptr{nullptr};
|
||||
if (mem_buf->img_size.empty()) {
|
||||
|
@ -307,11 +316,13 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
|
|||
new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
|
||||
}
|
||||
if (new_host_ptr == nullptr) {
|
||||
UnLock();
|
||||
MS_LOG(WARNING) << "Map buffer failed, can not found buffer or already mapped, dev_ptr=" << mem_buf->device_ptr_
|
||||
<< ", host_ptr=" << host_ptr;
|
||||
UnLock();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
mem_buf->map_flags = true;
|
||||
mem_buf->host_ptr_ = new_host_ptr;
|
||||
allocated_list_.erase(it);
|
||||
allocated_list_[new_host_ptr] = mem_buf;
|
||||
|
@ -327,16 +338,22 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
|
|||
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
|
||||
return ocl_runtime->UnmapBuffer(host_ptr);
|
||||
}
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
auto it = allocated_list_.find(host_ptr);
|
||||
if (it == allocated_list_.end()) {
|
||||
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (it->second->map_flags) {
|
||||
it->second->map_flags = false;
|
||||
cl::Memory *mem =
|
||||
static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_);
|
||||
return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue));
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Host ptr " << host_ptr << " do not mapped";
|
||||
return RET_OK;
|
||||
}
|
||||
cl::Memory *mem =
|
||||
static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_);
|
||||
return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue));
|
||||
}
|
||||
|
||||
MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
|
||||
|
@ -344,8 +361,8 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
|
|||
Lock();
|
||||
auto it = allocated_list_.find(host_ptr);
|
||||
if (it == allocated_list_.end()) {
|
||||
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
|
||||
UnLock();
|
||||
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
|
||||
return mem_type;
|
||||
}
|
||||
MemBuf *mem_buf = it->second;
|
||||
|
@ -362,8 +379,8 @@ int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector<size_t> *img_size)
|
|||
Lock();
|
||||
auto it = allocated_list_.find(host_ptr);
|
||||
if (it == allocated_list_.end()) {
|
||||
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
|
||||
UnLock();
|
||||
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
|
||||
return RET_OK;
|
||||
}
|
||||
MemBuf *mem_buf = it->second;
|
||||
|
|
|
@ -76,6 +76,7 @@ class OpenCLAllocator : public Allocator {
|
|||
void *host_ptr_;
|
||||
void *image_ptr_;
|
||||
std::vector<size_t> img_size;
|
||||
bool map_flags{false};
|
||||
};
|
||||
|
||||
std::mutex lock;
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::lite::opencl {
|
||||
|
||||
int OpenCLExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return RET_OK; }
|
||||
|
||||
int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Tensor *> &outputs,
|
||||
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||
const session::KernelCallBack &before, const session::KernelCallBack &after) {
|
||||
|
@ -71,136 +74,4 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int OpenCLExecutor::TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
|
||||
bool trans_dir) {
|
||||
MS_ASSERT(nullptr != tensor);
|
||||
MS_ASSERT(4 == tensor->shape().size());
|
||||
auto data_type = tensor->data_type();
|
||||
switch (data_type) {
|
||||
case kNumberTypeInt8:
|
||||
return TransformTensorLayoutUint8(tensor, src_format, dst_format, trans_dir);
|
||||
case kNumberTypeFloat32:
|
||||
return TransformTensorLayoutFp32(tensor, src_format, dst_format, trans_dir);
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
|
||||
<< schema::EnumNameFormat(dst_format);
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
int OpenCLExecutor::TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format,
|
||||
schema::Format dst_format, bool trans_dir) {
|
||||
MS_ASSERT(nullptr != tensor);
|
||||
MS_ASSERT(nullptr != allocator_);
|
||||
MS_ASSERT(4 == tensor->shape().size());
|
||||
if (trans_dir) {
|
||||
if (is_image2d_out_) {
|
||||
return TransformTensorLayoutToImage(tensor, src_format, dst_format);
|
||||
} else {
|
||||
return TransformTensorLayoutToBuffer(tensor, src_format, dst_format);
|
||||
}
|
||||
} else {
|
||||
if (is_image2d_out_) {
|
||||
return TransformTensorLayoutFromImage(tensor, src_format, dst_format);
|
||||
} else {
|
||||
return TransformTensorLayoutToBuffer(tensor, src_format, dst_format);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int OpenCLExecutor::TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format,
|
||||
schema::Format dst_format) {
|
||||
if (dst_format == schema::Format_NHWC4) {
|
||||
auto *src_data = tensor->Data();
|
||||
size_t C4 = UP_DIV(tensor->Channel(), C4NUM);
|
||||
std::vector<size_t> img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT};
|
||||
if (src_format == schema::Format_NHWC) {
|
||||
auto *dst_data = allocator_->Malloc(tensor->Size(), img_size);
|
||||
if (dst_data == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc data failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
dst_data = reinterpret_cast<FLOAT_t *>(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true));
|
||||
PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel());
|
||||
tensor->SetData(dst_data);
|
||||
allocator_->Free(src_data);
|
||||
allocator_->UnmapBuffer(dst_data);
|
||||
}
|
||||
tensor->SetFormat(dst_format);
|
||||
return RET_OK;
|
||||
} else if (dst_format == schema::Format_NHWC) {
|
||||
return RET_OK;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
|
||||
<< schema::EnumNameFormat(dst_format) << " in float32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
int OpenCLExecutor::TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format,
|
||||
schema::Format dst_format) {
|
||||
if (dst_format == schema::Format_NHWC4) {
|
||||
tensor->SetFormat(schema::Format_NHWC4);
|
||||
// convert to nhwc4
|
||||
auto *src_data = tensor->Data();
|
||||
auto *dst_data{src_data};
|
||||
if (src_format == schema::Format_NHWC) {
|
||||
dst_data = allocator_->Malloc(tensor->Size());
|
||||
if (dst_data == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc data failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
dst_data = reinterpret_cast<FLOAT_t *>(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true));
|
||||
PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel());
|
||||
tensor->SetData(dst_data);
|
||||
allocator_->Free(src_data);
|
||||
allocator_->UnmapBuffer(dst_data);
|
||||
}
|
||||
// copy to image2d
|
||||
src_data = dst_data;
|
||||
size_t C4 = UP_DIV(tensor->Channel(), C4NUM);
|
||||
std::vector<size_t> img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT};
|
||||
dst_data = allocator_->CreateImageFromHost(src_data, tensor->Size(), img_size);
|
||||
tensor->SetData(dst_data);
|
||||
allocator_->Free(src_data);
|
||||
return RET_OK;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
|
||||
<< schema::EnumNameFormat(dst_format) << " in float32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
int OpenCLExecutor::TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format,
|
||||
schema::Format dst_format) {
|
||||
if (dst_format == schema::Format_NHWC) {
|
||||
auto src_data = tensor->Data();
|
||||
auto dst_data = allocator_->Malloc(tensor->Size());
|
||||
cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
|
||||
std::vector<size_t> img_size;
|
||||
allocator_->GetImageSize(src_data, &img_size);
|
||||
auto origin = cl::array<cl::size_type, 3U>{0, 0, 0};
|
||||
auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1};
|
||||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||
ocl_runtime->GetDefaultCommandQueue()->enqueueReadImage(*out_mem, CL_TRUE, origin, region, 0, 0, dst_data);
|
||||
tensor->SetData(dst_data);
|
||||
allocator_->Free(src_data);
|
||||
return RET_OK;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
|
||||
<< schema::EnumNameFormat(dst_format) << " in float32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
int OpenCLExecutor::TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format,
|
||||
schema::Format dst_format, bool is_image) {
|
||||
MS_ASSERT(nullptr != tensor);
|
||||
MS_ASSERT(4 == tensor->shape().size());
|
||||
// auto src_format = tensor->GetFormat();
|
||||
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
|
||||
<< schema::EnumNameFormat(dst_format) << " in uint8";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
|
|
@ -27,38 +27,17 @@
|
|||
namespace mindspore::lite::opencl {
|
||||
class OpenCLExecutor : Executor {
|
||||
public:
|
||||
OpenCLExecutor() : Executor() {
|
||||
allocator_ = OpenCLRuntime::GetInstance()->GetAllocator();
|
||||
}
|
||||
OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); }
|
||||
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return 0; }
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
int Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Tensor *> &outputs,
|
||||
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
|
||||
const session::KernelCallBack &before = nullptr, const session::KernelCallBack &after = nullptr);
|
||||
|
||||
protected:
|
||||
int TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
|
||||
bool trans_dir = false);
|
||||
|
||||
int TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
|
||||
bool trans_dir = false);
|
||||
|
||||
int TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
|
||||
bool trans_dir = false);
|
||||
|
||||
int TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format);
|
||||
|
||||
int TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format);
|
||||
|
||||
int TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format);
|
||||
|
||||
protected:
|
||||
Context *context = nullptr;
|
||||
OpenCLAllocator *allocator_;
|
||||
bool is_image2d_out_{true};
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite::opencl
|
||||
#endif
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#ifdef SHARING_MEM_WITH_OPENGL
|
||||
#include <EGL/egl.h>
|
||||
#endif
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/kernel/opencl/utils.h"
|
||||
#include "src/runtime/opencl/opencl_allocator.h"
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
|
@ -80,7 +81,7 @@ int OpenCLRuntime::Init() {
|
|||
std::unique_lock<std::mutex> lck(g_init_mtx);
|
||||
|
||||
if (init_done_) {
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
MS_LOG(INFO) << "OpenCL version: CL_TARGET_OPENCL_VERSION " << CL_TARGET_OPENCL_VERSION;
|
||||
MS_LOG(INFO) << "CL_HPP_TARGET_OPENCL_VERSION " << CL_HPP_TARGET_OPENCL_VERSION;
|
||||
|
@ -89,7 +90,7 @@ int OpenCLRuntime::Init() {
|
|||
#ifdef USE_OPENCL_WRAPPER
|
||||
if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) {
|
||||
MS_LOG(ERROR) << "Load OpenCL symbols failed!";
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif // USE_OPENCL_WRAPPER
|
||||
|
||||
|
@ -97,7 +98,7 @@ int OpenCLRuntime::Init() {
|
|||
cl::Platform::get(&platforms);
|
||||
if (platforms.size() == 0) {
|
||||
MS_LOG(ERROR) << "OpenCL Platform not found!";
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// search GPU
|
||||
|
@ -119,7 +120,7 @@ int OpenCLRuntime::Init() {
|
|||
// not found, return error code.
|
||||
if (devices.size() == 0) {
|
||||
MS_LOG(ERROR) << "OpenCL Device not found!";
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
device_ = std::make_shared<cl::Device>();
|
||||
|
@ -158,7 +159,7 @@ int OpenCLRuntime::Init() {
|
|||
#endif
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err);
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// get cache size, compute units and frequency.
|
||||
|
@ -206,7 +207,7 @@ int OpenCLRuntime::Init() {
|
|||
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err);
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
allocator_ = std::make_shared<OpenCLAllocator>();
|
||||
|
@ -217,7 +218,7 @@ int OpenCLRuntime::Init() {
|
|||
init_done_ = true;
|
||||
MS_LOG(INFO) << "OpenCLRuntime init done!";
|
||||
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
OpenCLRuntime::~OpenCLRuntime() {
|
||||
|
@ -314,12 +315,12 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
|
|||
auto status = this->LoadProgram(program_name, &program);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << "load program (" << program_name << ") failed!";
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
status = this->BuildProgram(build_options_str, &program);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << program_name << " build failed!";
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
program_map_.emplace(build_program_key, program);
|
||||
}
|
||||
|
@ -328,9 +329,9 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
|
|||
kernel = cl::Kernel(program, kernel_name.c_str(), &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err);
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
|
||||
|
@ -365,10 +366,10 @@ int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t>
|
|||
|
||||
if (error != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error);
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(DEBUG) << "RunKernel success!";
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
|
||||
|
@ -413,14 +414,14 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
|
|||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Not supported NDRange!";
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err);
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(DEBUG) << "RunKernel success!";
|
||||
#if MS_OPENCL_PROFILE
|
||||
|
@ -432,7 +433,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
|
|||
double nanoSeconds = time_end - time_start;
|
||||
MS_LOG(INFO) << "OpenCl Execution time is: " << nanoSeconds / 1000000.0 << "ms";
|
||||
#endif
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// get gpu divce type
|
||||
|
@ -534,7 +535,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size,
|
|||
|
||||
int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
|
||||
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
|
@ -563,7 +564,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm
|
|||
|
||||
int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const {
|
||||
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
|
||||
return 0;
|
||||
return RET_OK;
|
||||
}
|
||||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_.get();
|
||||
|
@ -578,7 +579,7 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
|
|||
cl_int ret = command_queue->finish();
|
||||
if (ret != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Command queue sync failed: " << CLErrorCode(ret);
|
||||
return 1;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return ret == CL_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -41,11 +41,14 @@ static const std::vector<std::string> g_opencl_library_paths = {
|
|||
"/system/lib64/libOpenCL.so",
|
||||
#else
|
||||
// Qualcomm Adreno
|
||||
"/system/vendor/lib/libOpenCL.so", "/system/lib/libOpenCL.so",
|
||||
"/system/vendor/lib/libOpenCL.so",
|
||||
"/system/lib/libOpenCL.so",
|
||||
// Mali
|
||||
"/system/vendor/lib/egl/libGLES_mali.so", "/system/lib/egl/libGLES_mali.so",
|
||||
"/system/vendor/lib/egl/libGLES_mali.so",
|
||||
"/system/lib/egl/libGLES_mali.so",
|
||||
// other
|
||||
"/system/vendor/lib/libPVROCL.so", "/data/data/org.pocl.libs/files/lib/libpocl.so"
|
||||
"/system/vendor/lib/libPVROCL.so",
|
||||
"/data/data/org.pocl.libs/files/lib/libpocl.so"
|
||||
#endif
|
||||
"libOpenCL.so",
|
||||
"libGLES_mali.so",
|
||||
|
@ -680,4 +683,3 @@ cl_int clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint index, const void *hos
|
|||
#endif
|
||||
|
||||
#endif // USE_OPENCL_WRAPPER
|
||||
|
||||
|
|
|
@ -237,4 +237,3 @@ class OpenCLWrapper {
|
|||
} // namespace mindspore::lite::opencl
|
||||
#endif // USE_OPENCL_WRAPPER
|
||||
#endif // MINDSPORE_LITE_SRC_OPENCL_WRAPPER_H_
|
||||
|
||||
|
|
|
@ -179,13 +179,13 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
|
|||
|
||||
memcpy(data_c_ocl, outputs[0]->Data(), sizeof(float) * element_num);
|
||||
|
||||
// ocl_runtime->SyncCommandQueue();
|
||||
LogData(data_a, 10, "Data A : ");
|
||||
LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
|
||||
LogData(data_c_cpu, 10, "Expect compute : ");
|
||||
LogData(outputs[0]->Data(), 10, "OpenCL compute : ");
|
||||
bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num);
|
||||
MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
|
||||
EXPECT_EQ(true, cmp);
|
||||
|
||||
// free
|
||||
delete[] data_a;
|
||||
|
|
Loading…
Reference in New Issue