!4938 auto umap buffer for opencl and clean code

Merge pull request !4938 from liuchao/lite-master
This commit is contained in:
mindspore-ci-bot 2020-08-22 11:49:34 +08:00 committed by Gitee
commit 387f4d445f
14 changed files with 80 additions and 209 deletions

View File

@ -70,7 +70,7 @@ int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
return 0;
return RET_OK;
}
int ArithmeticOpenCLKernel::Init() {

View File

@ -59,7 +59,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
return RET_OK;
}
int Conv2dTransposeOpenCLKernel::ReSize() { return 0; }
int Conv2dTransposeOpenCLKernel::ReSize() { return RET_OK; }
void Conv2dTransposeOpenCLKernel::PadWeight() {
ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_);

View File

@ -67,10 +67,10 @@ int MatMulOpenCLKernel::Init() {
in_tensors_[0]->SetFormat(schema::Format_NC4);
}
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return 0;
return RET_OK;
}
int MatMulOpenCLKernel::ReSize() { return 0; }
int MatMulOpenCLKernel::ReSize() { return RET_OK; }
void MatMulOpenCLKernel::PadWeight() {
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
@ -147,7 +147,7 @@ int MatMulOpenCLKernel::Run() {
ocl_runtime->SetKernelArg(kernel_, arg_count++, sizeCO);
ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
return 0;
return RET_OK;
}
kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,

View File

@ -63,7 +63,7 @@ int ReshapeOpenCLKernel::Init() {
return RET_OK;
}
int ReshapeOpenCLKernel::ReSize() { return 0; }
int ReshapeOpenCLKernel::ReSize() { return RET_OK; }
int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;

View File

@ -64,7 +64,7 @@ int TransposeOpenCLKernel::Init() {
return RET_OK;
}
int TransposeOpenCLKernel::ReSize() { return 0; }
int TransposeOpenCLKernel::ReSize() { return RET_OK; }
int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
size_t im_dst_x, im_dst_y;
@ -100,7 +100,7 @@ int TransposeOpenCLKernel::Run() {
ocl_runtime->SetKernelArg(kernel_, 2, HW);
ocl_runtime->SetKernelArg(kernel_, 3, C);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
return 0;
return RET_OK;
}
kernel::LiteKernel *OpenCLTransposeKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,

View File

@ -19,6 +19,7 @@
#include <vector>
#include "src/lite_kernel.h"
#include "include/errorcode.h"
namespace mindspore::kernel {
@ -37,15 +38,15 @@ class OpenCLKernel : public LiteKernel {
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {}
virtual int Init() { return -1; }
virtual int Prepare() { return -1; }
virtual int InferShape() { return -1; }
virtual int ReSize() { return -1; }
virtual int Run() { return -1; }
virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return -1; }
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return -1; }
virtual int Init() { return RET_ERROR; }
virtual int Prepare() { return RET_ERROR; }
virtual int InferShape() { return RET_ERROR; }
virtual int ReSize() { return RET_ERROR; }
virtual int Run() { return RET_ERROR; }
virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return RET_ERROR; }
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; }
virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) {
return -1;
return RET_ERROR;
}
OpenCLMemType GetMemType() { return out_mem_type_; }
void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; }

View File

@ -91,15 +91,15 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
cl::Buffer *buffer = new (std::nothrow)
cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret);
if (buffer == nullptr || ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")";
UnLock();
MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")";
return nullptr;
}
device_ptr = static_cast<void *>(buffer);
host_ptr = ocl_runtime->MapBuffer(*buffer, CL_MAP_READ | CL_MAP_WRITE, size);
if (host_ptr == nullptr) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
UnLock();
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr;
return nullptr;
}
cl::Memory *mem = buffer;
@ -199,12 +199,15 @@ void OpenCLAllocator::Free(void *buf) {
Lock();
auto iter = allocated_list_.find(buf);
if (iter != allocated_list_.end()) {
if (iter->second->map_flags) {
UnmapBuffer(buf);
iter->second->map_flags = false;
}
auto mem_buf = iter->second;
allocated_list_.erase(iter);
free_list_.insert(std::make_pair(mem_buf->size_, mem_buf));
UnLock();
buf = nullptr;
MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
MS_LOG(DEBUG) << "Free device buffer. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
<< ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_
<< ", free list size: " << free_list_.size();
return;
@ -291,10 +294,16 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
Lock();
auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
UnLock();
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
return nullptr;
}
if (it->second->map_flags) {
UnLock();
MS_LOG(WARNING) << "Host ptr " << host_ptr << " has mapped";
return host_ptr;
}
MemBuf *mem_buf = it->second;
void *new_host_ptr{nullptr};
if (mem_buf->img_size.empty()) {
@ -307,11 +316,13 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
}
if (new_host_ptr == nullptr) {
UnLock();
MS_LOG(WARNING) << "Map buffer failed, can not found buffer or already mapped, dev_ptr=" << mem_buf->device_ptr_
<< ", host_ptr=" << host_ptr;
UnLock();
return nullptr;
}
mem_buf->map_flags = true;
mem_buf->host_ptr_ = new_host_ptr;
allocated_list_.erase(it);
allocated_list_[new_host_ptr] = mem_buf;
@ -327,16 +338,22 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
return ocl_runtime->UnmapBuffer(host_ptr);
}
return 0;
return RET_OK;
}
auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr;
return 1;
return RET_ERROR;
}
if (it->second->map_flags) {
it->second->map_flags = false;
cl::Memory *mem =
static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_);
return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue));
} else {
MS_LOG(WARNING) << "Host ptr " << host_ptr << " do not mapped";
return RET_OK;
}
cl::Memory *mem =
static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_);
return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue));
}
MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
@ -344,8 +361,8 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) {
Lock();
auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) {
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
UnLock();
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
return mem_type;
}
MemBuf *mem_buf = it->second;
@ -362,8 +379,8 @@ int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector<size_t> *img_size)
Lock();
auto it = allocated_list_.find(host_ptr);
if (it == allocated_list_.end()) {
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
UnLock();
MS_LOG(ERROR) << "Can not found buffer :" << host_ptr;
return RET_OK;
}
MemBuf *mem_buf = it->second;

View File

@ -76,6 +76,7 @@ class OpenCLAllocator : public Allocator {
void *host_ptr_;
void *image_ptr_;
std::vector<size_t> img_size;
bool map_flags{false};
};
std::mutex lock;

View File

@ -21,6 +21,9 @@
#include "include/errorcode.h"
namespace mindspore::lite::opencl {
int OpenCLExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return RET_OK; }
int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Tensor *> &outputs,
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
const session::KernelCallBack &before, const session::KernelCallBack &after) {
@ -71,136 +74,4 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
}
return RET_OK;
}
int OpenCLExecutor::TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
bool trans_dir) {
MS_ASSERT(nullptr != tensor);
MS_ASSERT(4 == tensor->shape().size());
auto data_type = tensor->data_type();
switch (data_type) {
case kNumberTypeInt8:
return TransformTensorLayoutUint8(tensor, src_format, dst_format, trans_dir);
case kNumberTypeFloat32:
return TransformTensorLayoutFp32(tensor, src_format, dst_format, trans_dir);
default:
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
<< schema::EnumNameFormat(dst_format);
return RET_ERROR;
}
}
int OpenCLExecutor::TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format,
schema::Format dst_format, bool trans_dir) {
MS_ASSERT(nullptr != tensor);
MS_ASSERT(nullptr != allocator_);
MS_ASSERT(4 == tensor->shape().size());
if (trans_dir) {
if (is_image2d_out_) {
return TransformTensorLayoutToImage(tensor, src_format, dst_format);
} else {
return TransformTensorLayoutToBuffer(tensor, src_format, dst_format);
}
} else {
if (is_image2d_out_) {
return TransformTensorLayoutFromImage(tensor, src_format, dst_format);
} else {
return TransformTensorLayoutToBuffer(tensor, src_format, dst_format);
}
}
}
int OpenCLExecutor::TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format,
schema::Format dst_format) {
if (dst_format == schema::Format_NHWC4) {
auto *src_data = tensor->Data();
size_t C4 = UP_DIV(tensor->Channel(), C4NUM);
std::vector<size_t> img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT};
if (src_format == schema::Format_NHWC) {
auto *dst_data = allocator_->Malloc(tensor->Size(), img_size);
if (dst_data == nullptr) {
MS_LOG(ERROR) << "Malloc data failed";
return RET_ERROR;
}
dst_data = reinterpret_cast<FLOAT_t *>(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true));
PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel());
tensor->SetData(dst_data);
allocator_->Free(src_data);
allocator_->UnmapBuffer(dst_data);
}
tensor->SetFormat(dst_format);
return RET_OK;
} else if (dst_format == schema::Format_NHWC) {
return RET_OK;
} else {
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
<< schema::EnumNameFormat(dst_format) << " in float32";
return RET_ERROR;
}
}
int OpenCLExecutor::TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format,
schema::Format dst_format) {
if (dst_format == schema::Format_NHWC4) {
tensor->SetFormat(schema::Format_NHWC4);
// convert to nhwc4
auto *src_data = tensor->Data();
auto *dst_data{src_data};
if (src_format == schema::Format_NHWC) {
dst_data = allocator_->Malloc(tensor->Size());
if (dst_data == nullptr) {
MS_LOG(ERROR) << "Malloc data failed";
return RET_ERROR;
}
dst_data = reinterpret_cast<FLOAT_t *>(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true));
PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel());
tensor->SetData(dst_data);
allocator_->Free(src_data);
allocator_->UnmapBuffer(dst_data);
}
// copy to image2d
src_data = dst_data;
size_t C4 = UP_DIV(tensor->Channel(), C4NUM);
std::vector<size_t> img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT};
dst_data = allocator_->CreateImageFromHost(src_data, tensor->Size(), img_size);
tensor->SetData(dst_data);
allocator_->Free(src_data);
return RET_OK;
} else {
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
<< schema::EnumNameFormat(dst_format) << " in float32";
return RET_ERROR;
}
}
int OpenCLExecutor::TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format,
schema::Format dst_format) {
if (dst_format == schema::Format_NHWC) {
auto src_data = tensor->Data();
auto dst_data = allocator_->Malloc(tensor->Size());
cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
std::vector<size_t> img_size;
allocator_->GetImageSize(src_data, &img_size);
auto origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1};
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->GetDefaultCommandQueue()->enqueueReadImage(*out_mem, CL_TRUE, origin, region, 0, 0, dst_data);
tensor->SetData(dst_data);
allocator_->Free(src_data);
return RET_OK;
} else {
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
<< schema::EnumNameFormat(dst_format) << " in float32";
return RET_ERROR;
}
}
int OpenCLExecutor::TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format,
schema::Format dst_format, bool is_image) {
MS_ASSERT(nullptr != tensor);
MS_ASSERT(4 == tensor->shape().size());
// auto src_format = tensor->GetFormat();
MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to "
<< schema::EnumNameFormat(dst_format) << " in uint8";
return RET_ERROR;
}
} // namespace mindspore::lite::opencl

View File

@ -27,38 +27,17 @@
namespace mindspore::lite::opencl {
class OpenCLExecutor : Executor {
public:
OpenCLExecutor() : Executor() {
allocator_ = OpenCLRuntime::GetInstance()->GetAllocator();
}
OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); }
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return 0; }
int Prepare(const std::vector<kernel::LiteKernel *> &kernels);
int Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Tensor *> &outputs,
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
const session::KernelCallBack &before = nullptr, const session::KernelCallBack &after = nullptr);
protected:
int TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
bool trans_dir = false);
int TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
bool trans_dir = false);
int TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format,
bool trans_dir = false);
int TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format);
int TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format);
int TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format);
protected:
Context *context = nullptr;
OpenCLAllocator *allocator_;
bool is_image2d_out_{true};
};
} // namespace mindspore::lite::opencl
#endif

View File

@ -20,6 +20,7 @@
#ifdef SHARING_MEM_WITH_OPENGL
#include <EGL/egl.h>
#endif
#include "include/errorcode.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/opencl/opencl_allocator.h"
#ifdef PROGRAM_WITH_IL
@ -80,7 +81,7 @@ int OpenCLRuntime::Init() {
std::unique_lock<std::mutex> lck(g_init_mtx);
if (init_done_) {
return 0;
return RET_OK;
}
MS_LOG(INFO) << "OpenCL version: CL_TARGET_OPENCL_VERSION " << CL_TARGET_OPENCL_VERSION;
MS_LOG(INFO) << "CL_HPP_TARGET_OPENCL_VERSION " << CL_HPP_TARGET_OPENCL_VERSION;
@ -89,7 +90,7 @@ int OpenCLRuntime::Init() {
#ifdef USE_OPENCL_WRAPPER
if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) {
MS_LOG(ERROR) << "Load OpenCL symbols failed!";
return 1;
return RET_ERROR;
}
#endif // USE_OPENCL_WRAPPER
@ -97,7 +98,7 @@ int OpenCLRuntime::Init() {
cl::Platform::get(&platforms);
if (platforms.size() == 0) {
MS_LOG(ERROR) << "OpenCL Platform not found!";
return 1;
return RET_ERROR;
}
// search GPU
@ -119,7 +120,7 @@ int OpenCLRuntime::Init() {
// not found, return error code.
if (devices.size() == 0) {
MS_LOG(ERROR) << "OpenCL Device not found!";
return 1;
return RET_ERROR;
}
device_ = std::make_shared<cl::Device>();
@ -158,7 +159,7 @@ int OpenCLRuntime::Init() {
#endif
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err);
return 1;
return RET_ERROR;
}
// get cache size, compute units and frequency.
@ -206,7 +207,7 @@ int OpenCLRuntime::Init() {
default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &err);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err);
return 1;
return RET_ERROR;
}
allocator_ = std::make_shared<OpenCLAllocator>();
@ -217,7 +218,7 @@ int OpenCLRuntime::Init() {
init_done_ = true;
MS_LOG(INFO) << "OpenCLRuntime init done!";
return 0;
return RET_OK;
}
OpenCLRuntime::~OpenCLRuntime() {
@ -314,12 +315,12 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
auto status = this->LoadProgram(program_name, &program);
if (!status) {
MS_LOG(ERROR) << "load program (" << program_name << ") failed!";
return 1;
return RET_ERROR;
}
status = this->BuildProgram(build_options_str, &program);
if (!status) {
MS_LOG(ERROR) << program_name << " build failed!";
return 1;
return RET_ERROR;
}
program_map_.emplace(build_program_key, program);
}
@ -328,9 +329,9 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
kernel = cl::Kernel(program, kernel_name.c_str(), &err);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err);
return 1;
return RET_ERROR;
}
return 0;
return RET_OK;
}
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
@ -365,10 +366,10 @@ int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t>
if (error != CL_SUCCESS) {
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error);
return 1;
return RET_ERROR;
}
MS_LOG(DEBUG) << "RunKernel success!";
return 0;
return RET_OK;
}
// Run Kernel with 1D, 2D, 3D group size, and local size can be empty.
@ -413,14 +414,14 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
}
} else {
MS_LOG(ERROR) << "Not supported NDRange!";
return 1;
return RET_ERROR;
}
err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event);
if (err != CL_SUCCESS) {
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err);
return 1;
return RET_ERROR;
}
MS_LOG(DEBUG) << "RunKernel success!";
#if MS_OPENCL_PROFILE
@ -432,7 +433,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
double nanoSeconds = time_end - time_start;
MS_LOG(INFO) << "OpenCl Execution time is: " << nanoSeconds / 1000000.0 << "ms";
#endif
return 0;
return RET_OK;
}
// get gpu divce type
@ -534,7 +535,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size,
int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
return 0;
return RET_OK;
}
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
@ -563,7 +564,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm
int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const {
if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
return 0;
return RET_OK;
}
if (command_queue == nullptr) {
command_queue = default_command_queue_.get();
@ -578,7 +579,7 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
cl_int ret = command_queue->finish();
if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Command queue sync failed: " << CLErrorCode(ret);
return 1;
return RET_ERROR;
}
return ret == CL_SUCCESS;
}

View File

@ -41,11 +41,14 @@ static const std::vector<std::string> g_opencl_library_paths = {
"/system/lib64/libOpenCL.so",
#else
// Qualcomm Adreno
"/system/vendor/lib/libOpenCL.so", "/system/lib/libOpenCL.so",
"/system/vendor/lib/libOpenCL.so",
"/system/lib/libOpenCL.so",
// Mali
"/system/vendor/lib/egl/libGLES_mali.so", "/system/lib/egl/libGLES_mali.so",
"/system/vendor/lib/egl/libGLES_mali.so",
"/system/lib/egl/libGLES_mali.so",
// other
"/system/vendor/lib/libPVROCL.so", "/data/data/org.pocl.libs/files/lib/libpocl.so"
"/system/vendor/lib/libPVROCL.so",
"/data/data/org.pocl.libs/files/lib/libpocl.so"
#endif
"libOpenCL.so",
"libGLES_mali.so",
@ -680,4 +683,3 @@ cl_int clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint index, const void *hos
#endif
#endif // USE_OPENCL_WRAPPER

View File

@ -237,4 +237,3 @@ class OpenCLWrapper {
} // namespace mindspore::lite::opencl
#endif // USE_OPENCL_WRAPPER
#endif // MINDSPORE_LITE_SRC_OPENCL_WRAPPER_H_

View File

@ -179,13 +179,13 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b)
memcpy(data_c_ocl, outputs[0]->Data(), sizeof(float) * element_num);
// ocl_runtime->SyncCommandQueue();
LogData(data_a, 10, "Data A : ");
LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
LogData(data_c_cpu, 10, "Expect compute : ");
LogData(outputs[0]->Data(), 10, "OpenCL compute : ");
bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num);
MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
EXPECT_EQ(true, cmp);
// free
delete[] data_a;