fix security check

This commit is contained in:
gongdaguo 2021-08-03 16:20:09 +08:00
parent 0c707cd888
commit fe438fae9c
82 changed files with 2222 additions and 632 deletions

View File

@ -108,12 +108,15 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi
} }
if (*image == nullptr) { if (*image == nullptr) {
delete *buffer; delete *buffer;
*buffer = nullptr;
MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
return nullptr; return nullptr;
} }
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
delete *buffer; delete *buffer;
delete *image; delete *image;
*buffer = nullptr;
*image = nullptr;
MS_LOG(ERROR) << "Create OpenCL Image2D (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; MS_LOG(ERROR) << "Create OpenCL Image2D (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
return nullptr; return nullptr;
} }
@ -125,6 +128,8 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi
if (host_ptr == nullptr) { if (host_ptr == nullptr) {
delete *buffer; delete *buffer;
delete *image; delete *image;
*buffer = nullptr;
*image = nullptr;
MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr; MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr;
return nullptr; return nullptr;
} }

View File

@ -210,6 +210,7 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
#endif #endif
if (context_ == nullptr || ret != CL_SUCCESS) { if (context_ == nullptr || ret != CL_SUCCESS) {
delete device_; delete device_;
device_ = nullptr;
MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret); MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
return RET_ERROR; return RET_ERROR;
} }
@ -218,6 +219,8 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
if (default_command_queue_ == nullptr || ret != CL_SUCCESS) { if (default_command_queue_ == nullptr || ret != CL_SUCCESS) {
delete device_; delete device_;
delete context_; delete context_;
device_ = nullptr;
context_ = nullptr;
MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret); MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
return RET_ERROR; return RET_ERROR;
} }
@ -227,6 +230,9 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
delete device_; delete device_;
delete context_; delete context_;
delete default_command_queue_; delete default_command_queue_;
device_ = nullptr;
context_ = nullptr;
default_command_queue_ = nullptr;
MS_LOG(ERROR) << "Profiling command Queue create failed: " << CLErrorCode(ret); MS_LOG(ERROR) << "Profiling command Queue create failed: " << CLErrorCode(ret);
return RET_ERROR; return RET_ERROR;
} }
@ -291,6 +297,10 @@ int OpenCLRuntime::Init() {
delete context_; delete context_;
delete default_command_queue_; delete default_command_queue_;
delete profiling_command_queue_; delete profiling_command_queue_;
device_ = nullptr;
context_ = nullptr;
default_command_queue_ = nullptr;
profiling_command_queue_ = nullptr;
MS_LOG(ERROR) << "Command OpenCL allocator failed!"; MS_LOG(ERROR) << "Command OpenCL allocator failed!";
return RET_ERROR; return RET_ERROR;
} }
@ -305,7 +315,9 @@ int OpenCLRuntime::Uninit() {
if (init_state_ != InitSuccess) { if (init_state_ != InitSuccess) {
return RET_OK; return RET_OK;
} }
StoreCache(); if (StoreCache() != RET_OK) {
MS_LOG(ERROR) << "StoreCache failed!";
}
program_map_.clear(); program_map_.clear();
delete default_command_queue_; delete default_command_queue_;
delete profiling_command_queue_; delete profiling_command_queue_;
@ -574,12 +586,15 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size,
int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
return RET_OK; return RET_ERROR;
} }
if (command_queue == nullptr) { if (command_queue == nullptr) {
command_queue = default_command_queue_; command_queue = default_command_queue_;
} }
return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr); if (clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr) != CL_SUCCESS) {
return RET_ERROR;
}
return RET_OK;
} }
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region, void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
@ -720,17 +735,17 @@ void OpenCLRuntime::LoadCache() {
MS_LOG(INFO) << "Init opencl cache success"; MS_LOG(INFO) << "Init opencl cache success";
} }
void OpenCLRuntime::StoreCache() { int OpenCLRuntime::StoreCache() {
if (!enable_cache_) { if (!enable_cache_) {
return; return RET_OK;
} }
if (!flush_cache_) { if (!flush_cache_) {
return; return RET_OK;
} }
auto fbb = std::make_unique<flatbuffers::FlatBufferBuilder>(); auto fbb = std::make_unique<flatbuffers::FlatBufferBuilder>();
if (fbb == nullptr) { if (fbb == nullptr) {
MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail"; MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail";
return; return RET_ERROR;
} }
std::vector<flatbuffers::Offset<schema::ProgramBinary>> program_binarys; std::vector<flatbuffers::Offset<schema::ProgramBinary>> program_binarys;
for (const auto &kv : program_map_) { for (const auto &kv : program_map_) {
@ -753,8 +768,12 @@ void OpenCLRuntime::StoreCache() {
auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data); auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data);
fbb->Finish(gpu_cache); fbb->Finish(gpu_cache);
uint8_t *buf = fbb->GetBufferPointer(); uint8_t *buf = fbb->GetBufferPointer();
WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize()); if (WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize()) != RET_OK) {
MS_LOG(ERROR) << "WriteToBin failed.";
return RET_ERROR;
}
MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize(); MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize();
return RET_OK;
} }
cl::Buffer *OpenCLRuntime::CreateSharedMemoryBuffer(size_t size, void *host_ptr) { cl::Buffer *OpenCLRuntime::CreateSharedMemoryBuffer(size_t size, void *host_ptr) {

View File

@ -203,7 +203,7 @@ class OpenCLRuntime {
// for cache // for cache
private: private:
void LoadCache(); void LoadCache();
void StoreCache(); int StoreCache();
#ifdef MS_OPENCL_BINARY_CACHE #ifdef MS_OPENCL_BINARY_CACHE
bool enable_cache_{true}; bool enable_cache_{true};
#else #else

View File

@ -65,37 +65,53 @@ int ActivationOpenCLKernel::CheckSpecs() {
int ActivationOpenCLKernel::Prepare() { int ActivationOpenCLKernel::Prepare() {
outShape = GpuTensorInfo(out_tensors_[0]); outShape = GpuTensorInfo(out_tensors_[0]);
std::string source = activation_source; std::string source = activation_source;
std::string program_name = "Activation"; const std::string program_name = "Activation";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
} }
std::string kernel_name = GetActTypeString(type_); const std::string kernel_name = GetActTypeString(type_);
auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " init Done!"; MS_LOG(DEBUG) << kernel_name << " init Done!";
return RET_OK; return RET_OK;
} }
void ActivationOpenCLKernel::SetConstArgs() { int ActivationOpenCLKernel::SetConstArgs() {
int arg_idx = 2; int arg_idx = 2;
cl_int2 image_size = {static_cast<int>(outShape.width), static_cast<int>(outShape.height)}; cl_int2 image_size = {static_cast<int>(outShape.width), static_cast<int>(outShape.height)};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (type_ == ActivationType_LEAKY_RELU) { if (type_ == ActivationType_LEAKY_RELU) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
if (type_ == ActivationType_SIGMOID) { if (type_ == ActivationType_SIGMOID) {
int c4 = outShape.Slice; int c4 = outShape.Slice;
int last_c4 = outShape.C % 4 == 0 ? 4 : outShape.C % 4; int last_c4 = outShape.C % 4 == 0 ? 4 : outShape.C % 4;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
return RET_OK;
} }
void ActivationOpenCLKernel::SetGlobalLocal() { void ActivationOpenCLKernel::SetGlobalLocal() {
@ -107,8 +123,14 @@ void ActivationOpenCLKernel::SetGlobalLocal() {
int ActivationOpenCLKernel::Run() { int ActivationOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail."; MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail.";

View File

@ -35,7 +35,7 @@ class ActivationOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -16,6 +16,7 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <functional> #include <functional>
#include <algorithm>
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/kernel/argminmax.h" #include "src/runtime/kernel/opencl/kernel/argminmax.h"
@ -58,19 +59,41 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void ArgMinMaxOpenCLKernel::SetConstArgs() { int ArgMinMaxOpenCLKernel::SetConstArgs() {
auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
cl_int4 in_shape{static_cast<int>(im_in_.N), static_cast<int>(im_in_.H), static_cast<int>(im_in_.W), cl_int4 in_shape{static_cast<int>(im_in_.N), static_cast<int>(im_in_.H), static_cast<int>(im_in_.W),
static_cast<int>(im_in_.C)}; static_cast<int>(im_in_.C)};
cl_int4 flags = {param->out_value_, param->get_max_, param->axis_, param->topk_}; cl_int4 flags = {param->out_value_, param->get_max_, param->axis_, param->topk_};
int arg_cnt = 2; int arg_cnt = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_); }
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void ArgMinMaxOpenCLKernel::SetGlobalLocal() { void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
@ -134,14 +157,22 @@ int ArgMinMaxOpenCLKernel::InitWeights() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
int dtype_size = ocl_runtime_->GetFp16Enable() ? sizeof(int16_t) : sizeof(float); int dtype_size = ocl_runtime_->GetFp16Enable() ? sizeof(int16_t) : sizeof(float);
buff_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * dtype_size, lite::opencl::MemType::BUF); buff_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * dtype_size, lite::opencl::MemType::BUF);
if (buff_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
ids_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * sizeof(int32_t), lite::opencl::MemType::BUF); ids_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * sizeof(int32_t), lite::opencl::MemType::BUF);
if (ids_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
int ArgMinMaxOpenCLKernel::Prepare() { int ArgMinMaxOpenCLKernel::Prepare() {
std::string kernel_name = "argminmax"; const std::string kernel_name = "argminmax";
std::string source = argminmax_source; std::string source = argminmax_source;
std::string program_name = "argminmax"; const std::string program_name = "argminmax";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -162,16 +193,28 @@ int ArgMinMaxOpenCLKernel::Prepare() {
InitWeights(); InitWeights();
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
int ArgMinMaxOpenCLKernel::Run() { int ArgMinMaxOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class ArgMinMaxOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int InitWeights() override; int InitWeights() override;
int Tune() override { return lite::RET_OK; } int Tune() override { return lite::RET_OK; }

View File

@ -98,6 +98,10 @@ int ArithmeticOpenCLKernel::InitWeights() {
size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT; size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT;
ImageSize img_size{in_shape.width, in_shape.height, dtype}; ImageSize img_size{in_shape.width, in_shape.height, dtype};
auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
if (weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
weight_ptrs_.push_back(weight_ptr_); weight_ptrs_.push_back(weight_ptr_);
} else { } else {
weight_ptrs_.push_back(nullptr); weight_ptrs_.push_back(nullptr);
@ -106,7 +110,7 @@ int ArithmeticOpenCLKernel::InitWeights() {
return RET_OK; return RET_OK;
} }
void ArithmeticOpenCLKernel::SetConstArgs() { int ArithmeticOpenCLKernel::SetConstArgs() {
int arg_idx = 3; int arg_idx = 3;
if (!element_flag_) { if (!element_flag_) {
cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W), cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W),
@ -121,16 +125,38 @@ void ArithmeticOpenCLKernel::SetConstArgs() {
} else if (in0_shape_.C != 1 && in1_shape_.C == 1) { } else if (in0_shape_.C != 1 && in1_shape_.C == 1) {
broadcastC_flag = 2; // BroadCast C4 in input1 broadcastC_flag = 2; // BroadCast C4 in input1
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])}; cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_); }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int ArithmeticOpenCLKernel::Prepare() { int ArithmeticOpenCLKernel::Prepare() {
@ -179,7 +205,7 @@ int ArithmeticOpenCLKernel::Prepare() {
activation_max_ = 6.f; activation_max_ = 6.f;
} }
std::string program_name = "Arithmetic"; const std::string program_name = "Arithmetic";
std::string source = arithmetic_source; std::string source = arithmetic_source;
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -196,7 +222,10 @@ int ArithmeticOpenCLKernel::Prepare() {
if (type() != PrimitiveType_BiasAdd) { if (type() != PrimitiveType_BiasAdd) {
InitWeights(); InitWeights();
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name_ << " Init Done!"; MS_LOG(DEBUG) << kernel_name_ << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -206,10 +235,22 @@ int ArithmeticOpenCLKernel::Run() {
auto input_0_ptr = weight_ptrs_[0] == nullptr ? in_tensors_[0]->data_c() : weight_ptrs_[0]; auto input_0_ptr = weight_ptrs_[0] == nullptr ? in_tensors_[0]->data_c() : weight_ptrs_[0];
auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1]; auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1];
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); return RET_ERROR;
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -35,7 +35,7 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -86,7 +86,7 @@ int ArithmeticSelfOpenCLKernel::Prepare() {
kernel_name += std::string(schema::EnumNamePrimitiveType(type())) + "_NHWC4"; kernel_name += std::string(schema::EnumNamePrimitiveType(type())) + "_NHWC4";
} }
MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; MS_LOG(DEBUG) << "execute kernel name : " << kernel_name;
std::string program_name = "ArithmeticSelf"; const std::string program_name = "ArithmeticSelf";
if (!ocl_runtime_->LoadSource(program_name, arithmeticself_source)) { if (!ocl_runtime_->LoadSource(program_name, arithmeticself_source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -98,15 +98,27 @@ int ArithmeticSelfOpenCLKernel::Prepare() {
return ret; return ret;
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
int ArithmeticSelfOpenCLKernel::Run() { int ArithmeticSelfOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -47,7 +47,13 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); } int SetConstArgs() override {
if (ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
}
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -55,7 +55,7 @@ int BatchToSpaceNDOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void BatchToSpaceNDOpenCLKernel::SetConstArgs() { int BatchToSpaceNDOpenCLKernel::SetConstArgs() {
auto param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_); auto param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_);
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
@ -66,10 +66,23 @@ void BatchToSpaceNDOpenCLKernel::SetConstArgs() {
cl_int4 paddings = {param->crops_[0], param->crops_[1], param->crops_[2], param->crops_[3]}; cl_int4 paddings = {param->crops_[0], param->crops_[1], param->crops_[2], param->crops_[3]};
int arg_cnt = 2; int arg_cnt = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() { void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() {
@ -82,9 +95,9 @@ void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() {
} }
int BatchToSpaceNDOpenCLKernel::Prepare() { int BatchToSpaceNDOpenCLKernel::Prepare() {
std::string kernel_name = "batch_to_space_nd_NHWC4"; const std::string kernel_name = "batch_to_space_nd_NHWC4";
std::string source = batch_to_space_nd_source; std::string source = batch_to_space_nd_source;
std::string program_name = "batch_to_space_nd"; const std::string program_name = "batch_to_space_nd";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -96,16 +109,28 @@ int BatchToSpaceNDOpenCLKernel::Prepare() {
return ret; return ret;
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
int BatchToSpaceNDOpenCLKernel::Run() { int BatchToSpaceNDOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class BatchToSpaceNDOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override { return lite::RET_OK; } int Tune() override { return lite::RET_OK; }

View File

@ -59,15 +59,25 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
local->push_back(z); local->push_back(z);
} }
void BatchNormOpenCLKernel::SetConstArgs() { int BatchNormOpenCLKernel::SetConstArgs() {
int arg_cn = 6; int arg_cn = 6;
auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_); auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_);
auto input0_shape = in_tensors_.at(0)->shape(); auto input0_shape = in_tensors_.at(0)->shape();
cl_int4 input_shape_ = {input0_shape.at(0), input0_shape.at(1), input0_shape.at(2), cl_int4 input_shape_ = {input0_shape.at(0), input0_shape.at(1), input0_shape.at(2),
UP_DIV(input0_shape.at(3), C4NUM)}; UP_DIV(input0_shape.at(3), C4NUM)};
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void BatchNormOpenCLKernel::SetGlobalLocal() { void BatchNormOpenCLKernel::SetGlobalLocal() {
@ -83,6 +93,41 @@ void BatchNormOpenCLKernel::SetGlobalLocal() {
OpenCLKernel::AlignGlobalLocal(global_size_, local_size_); OpenCLKernel::AlignGlobalLocal(global_size_, local_size_);
} }
int BatchNormOpenCLKernel::UnmapBuffer() {
auto allocator = ocl_runtime_->GetAllocator();
if (allocator->UnmapBuffer(scale_) != RET_OK) {
return RET_ERROR;
}
if (allocator->UnmapBuffer(offset_) != RET_OK) {
return RET_ERROR;
}
if (allocator->UnmapBuffer(mean_) != RET_OK) {
return RET_ERROR;
}
if (allocator->UnmapBuffer(variance_) != RET_OK) {
return RET_ERROR;
}
return RET_OK;
}
int BatchNormOpenCLKernel::MapBuffer() {
auto allocator = ocl_runtime_->GetAllocator();
if (allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true) == nullptr) {
return RET_ERROR;
}
if (allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true) == nullptr) {
return RET_ERROR;
}
if (allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true) == nullptr) {
return RET_ERROR;
}
if (allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true) == nullptr) {
return RET_ERROR;
}
return RET_OK;
}
int BatchNormOpenCLKernel::Initweight() { int BatchNormOpenCLKernel::Initweight() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
GpuTensorInfo img_info(in_tensors_.at(1)); GpuTensorInfo img_info(in_tensors_.at(1));
@ -90,15 +135,30 @@ int BatchNormOpenCLKernel::Initweight() {
size_t weight_size = img_info.OriginSize; size_t weight_size = img_info.OriginSize;
// allocated memory for weight and init value // allocated memory for weight and init value
scale_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); scale_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
if (scale_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
offset_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); offset_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
if (offset_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
mean_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); mean_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
if (mean_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
variance_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); variance_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
if (variance_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true); if (MapBuffer() != RET_OK) {
allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true); MS_LOG(ERROR) << "Map Buffer failed.";
allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true); return RET_ERROR;
allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true); }
memset(scale_, 1, weight_size); memset(scale_, 1, weight_size);
memset(offset_, 0x00, weight_size); memset(offset_, 0x00, weight_size);
memset(mean_, 0x00, weight_size); memset(mean_, 0x00, weight_size);
@ -153,18 +213,18 @@ int BatchNormOpenCLKernel::Initweight() {
memcpy(variance_, in_tensors_.at(4)->data_c(), weight_size); memcpy(variance_, in_tensors_.at(4)->data_c(), weight_size);
} }
} }
allocator->UnmapBuffer(scale_); if (UnmapBuffer() != RET_OK) {
allocator->UnmapBuffer(offset_); MS_LOG(ERROR) << "UnmapBuffer failed.";
allocator->UnmapBuffer(mean_); return RET_ERROR;
allocator->UnmapBuffer(variance_); }
return RET_OK; return RET_OK;
} }
int BatchNormOpenCLKernel::Prepare() { int BatchNormOpenCLKernel::Prepare() {
use_fp16_enable_ = ocl_runtime_->GetFp16Enable(); use_fp16_enable_ = ocl_runtime_->GetFp16Enable();
std::string kernel_name = "Batch_normalization_NHWC4"; const std::string kernel_name = "Batch_normalization_NHWC4";
std::string source = batchnorm_source; std::string source = batchnorm_source;
std::string program_name = "Batch_normalization"; const std::string program_name = "Batch_normalization";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -181,7 +241,10 @@ int BatchNormOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Initweight failed "; MS_LOG(ERROR) << "Initweight failed ";
return RET_ERROR; return RET_ERROR;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
@ -190,13 +253,34 @@ int BatchNormOpenCLKernel::Prepare() {
int BatchNormOpenCLKernel::Run() { int BatchNormOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
int arg_cn = 0; int arg_cn = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); // input tensor if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF); // scale MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF); // offset return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF); // mean } // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF); // variance if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); // out tensor MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
} // scale
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // offset
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // mean
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // variance
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // out tensor
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,11 +32,13 @@ class BatchNormOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:
int Initweight(); int Initweight();
int UnmapBuffer();
int MapBuffer();
private: private:
bool use_fp16_enable_{false}; bool use_fp16_enable_{false};

View File

@ -52,9 +52,13 @@ int CastOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void CastOpenCLKernel::SetConstArgs() { int CastOpenCLKernel::SetConstArgs() {
cl_int2 shape = {static_cast<int>(shape_.width), static_cast<int>(shape_.height)}; cl_int2 shape = {static_cast<int>(shape_.width), static_cast<int>(shape_.height)};
ocl_runtime_->SetKernelArg(kernel_, 2, shape); if (ocl_runtime_->SetKernelArg(kernel_, 2, shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void CastOpenCLKernel::SetGlobalLocal() { void CastOpenCLKernel::SetGlobalLocal() {
@ -68,8 +72,8 @@ int CastOpenCLKernel::Prepare() {
{kNumberTypeFloat32, "fp32"}, {kNumberTypeFloat32, "fp32"},
{kNumberTypeFloat16, "fp16"}, {kNumberTypeFloat16, "fp16"},
}; };
std::string program_name = "Cast"; const std::string program_name = "Cast";
std::string kernel_name = const std::string kernel_name =
"Cast_" + dtype_names[in_tensors_.front()->data_type()] + "_to_" + dtype_names[out_tensors_.front()->data_type()]; "Cast_" + dtype_names[in_tensors_.front()->data_type()] + "_to_" + dtype_names[out_tensors_.front()->data_type()];
if (!ocl_runtime_->LoadSource(program_name, cast_source)) { if (!ocl_runtime_->LoadSource(program_name, cast_source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -80,16 +84,28 @@ int CastOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
} }
int CastOpenCLKernel::Run() { int CastOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -31,7 +31,7 @@ class CastOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -38,7 +38,10 @@ int ConcatOpenCLKernel::RunAxis0() {
auto *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data)); auto *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
for (int i = 0; i < in_tensors_.size(); i++) { for (int i = 0; i < in_tensors_.size(); i++) {
auto src_data = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i); auto src_data = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i);
allocator_->GetImageSize(src_data, &img_size); if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
MS_LOG(ERROR) << "GetImageSize failed.";
return RET_ERROR;
}
auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1}; auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
auto *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); auto *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@ -107,7 +110,7 @@ int ConcatOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void ConcatOpenCLKernel::SetConstArgs() { int ConcatOpenCLKernel::SetConstArgs() {
GpuTensorInfo img_info(out_tensors_[0]); GpuTensorInfo img_info(out_tensors_[0]);
size_t dtype = ocl_runtime_->GetFp16Enable() ? sizeof(cl_half) : sizeof(cl_float); size_t dtype = ocl_runtime_->GetFp16Enable() ? sizeof(cl_half) : sizeof(cl_float);
stride_w = img_info.RowPitch() / dtype; stride_w = img_info.RowPitch() / dtype;
@ -124,9 +127,15 @@ void ConcatOpenCLKernel::SetConstArgs() {
temp.s[j] = in_tensor->shape()[j]; temp.s[j] = in_tensor->shape()[j];
} }
Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1); Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
} else { } else {
for (auto &in_tensor : in_tensors_) { for (auto &in_tensor : in_tensors_) {
cl_int4 temp = {}; cl_int4 temp = {};
@ -135,11 +144,18 @@ void ConcatOpenCLKernel::SetConstArgs() {
} }
Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1); Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1);
in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
} }
out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void ConcatOpenCLKernel::SetGlobalLocal() { void ConcatOpenCLKernel::SetGlobalLocal() {
@ -190,6 +206,10 @@ int ConcatOpenCLKernel::ConvertWeightToTensor() {
} }
ImageSize img_size{in_shape.width, in_shape.height, dtype}; ImageSize img_size{in_shape.width, in_shape.height, dtype};
auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
if (weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
weight_ptrs_.push_back(weight_ptr_); weight_ptrs_.push_back(weight_ptr_);
} else { } else {
weight_ptrs_.push_back(nullptr); weight_ptrs_.push_back(nullptr);
@ -222,7 +242,7 @@ int ConcatOpenCLKernel::Prepare() {
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
std::string source = concat_source; std::string source = concat_source;
std::string program_name = "Concat"; const std::string program_name = "Concat";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -234,7 +254,10 @@ int ConcatOpenCLKernel::Prepare() {
return ret; return ret;
} }
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
} }
@ -247,14 +270,27 @@ int ConcatOpenCLKernel::Run() {
int arg_cn = 0; int arg_cn = 0;
for (int i = 0; i < in_tensors_.size(); ++i) { for (int i = 0; i < in_tensors_.size(); ++i) {
auto input_ptr = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i); auto input_ptr = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
if (axis_ == 3 && !Align_) { if (axis_ == 3 && !Align_) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
} else { CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else {
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
} }
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
return RET_OK; return RET_OK;
} }

View File

@ -31,7 +31,7 @@ class ConcatOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -108,7 +108,10 @@ int Conv2DOpenCLKernel::Prepare() {
return ret; return ret;
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -142,7 +145,7 @@ void Conv2DOpenCLKernel::InitAttrs() {
int Conv2DOpenCLKernel::BuildKernel() { int Conv2DOpenCLKernel::BuildKernel() {
SetBlockSize(); SetBlockSize();
std::string program_name = "conv2d"; const std::string program_name = "conv2d";
std::stringstream kernel_name; std::stringstream kernel_name;
kernel_name << "Conv2D_H" << block_size_.H << "W" << block_size_.W << "C" << block_size_.C; kernel_name << "Conv2D_H" << block_size_.H << "W" << block_size_.W << "C" << block_size_.C;
if (filter_type_ == MemType::IMG) { if (filter_type_ == MemType::IMG) {
@ -245,9 +248,11 @@ void Conv2DOpenCLKernel::SetMaliFp16BlockSize(int task_size_per_cu, bool w_kerne
} }
int Conv2DOpenCLKernel::InitWeights() { int Conv2DOpenCLKernel::InitWeights() {
InitFilter(); if (InitFilter() != RET_OK) {
return RET_ERROR;
}
if (has_bias_) { if (has_bias_) {
InitBias(); return InitBias();
} }
return RET_OK; return RET_OK;
} }
@ -300,7 +305,7 @@ void ConvertFilter(void *src, void *dst, TypeId src_dtype, TypeId dst_dtype, Fil
} }
} }
void Conv2DOpenCLKernel::InitFilter() { int Conv2DOpenCLKernel::InitFilter() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
// allocate opencl memory: buffer or image2d // allocate opencl memory: buffer or image2d
@ -312,9 +317,17 @@ void Conv2DOpenCLKernel::InitFilter() {
size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
size = width * height * CO_TILE * sizeof_FLT_; size = width * height * CO_TILE * sizeof_FLT_;
packed_filter_ = allocator->Malloc({width, height, dtype}); packed_filter_ = allocator->Malloc({width, height, dtype});
if (packed_filter_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
} else { } else {
size = UP_DIV(CO_SLICES_, Ogroup) * KH_ * KW_ * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_; size = UP_DIV(CO_SLICES_, Ogroup) * KH_ * KW_ * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_;
packed_filter_ = allocator->Malloc(size, lite::opencl::MemType::BUF); packed_filter_ = allocator->Malloc(size, lite::opencl::MemType::BUF);
if (packed_filter_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
} }
// rearrange filter // rearrange filter
@ -333,15 +346,22 @@ void Conv2DOpenCLKernel::InitFilter() {
if (filter_type_ == MemType::IMG) { if (filter_type_ == MemType::IMG) {
ocl_runtime_->WriteImage(packed_filter_, tmp.data()); ocl_runtime_->WriteImage(packed_filter_, tmp.data());
} else { } else {
allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true); if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memcpy(packed_filter_, tmp.data(), size); memcpy(packed_filter_, tmp.data(), size);
allocator->UnmapBuffer(packed_filter_); if (allocator->UnmapBuffer(packed_filter_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
} }
FreeStoredData(stored_filter_); FreeStoredData(stored_filter_);
return RET_OK;
} }
void Conv2DOpenCLKernel::InitBias() { int Conv2DOpenCLKernel::InitBias() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
// align bias from C to C4 // align bias from C to C4
@ -349,8 +369,15 @@ void Conv2DOpenCLKernel::InitBias() {
void *src_data = stored_bias_ == nullptr ? bias_tensor->data_c() : stored_bias_; void *src_data = stored_bias_ == nullptr ? bias_tensor->data_c() : stored_bias_;
size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_; size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_;
packed_bias_ = allocator->Malloc(packed_bias_size, lite::opencl::MemType::BUF); packed_bias_ = allocator->Malloc(packed_bias_size, lite::opencl::MemType::BUF);
if (packed_bias_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true); if (allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(packed_bias_, 0x00, packed_bias_size); memset(packed_bias_, 0x00, packed_bias_size);
if (bias_tensor->data_type() == kNumberTypeFloat16) { if (bias_tensor->data_type() == kNumberTypeFloat16) {
if (use_fp16_) { if (use_fp16_) {
@ -375,11 +402,15 @@ void Conv2DOpenCLKernel::InitBias() {
memcpy(packed_bias_, src_data, CO_ * sizeof_FLT_); memcpy(packed_bias_, src_data, CO_ * sizeof_FLT_);
} }
} }
allocator->UnmapBuffer(packed_bias_); if (allocator->UnmapBuffer(packed_bias_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_bias_); FreeStoredData(stored_bias_);
return RET_OK;
} }
void Conv2DOpenCLKernel::SetConstArgs() { int Conv2DOpenCLKernel::SetConstArgs() {
cl_int4 input_shape = {batch_size_, IH_, IW_, CI_SLICES_}; cl_int4 input_shape = {batch_size_, IH_, IW_, CI_SLICES_};
cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_}; cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_};
cl_int4 kernel_stride = {KH_, KW_, param_->stride_h_, param_->stride_w_}; cl_int4 kernel_stride = {KH_, KW_, param_->stride_h_, param_->stride_w_};
@ -387,15 +418,43 @@ void Conv2DOpenCLKernel::SetConstArgs() {
cl_int2 dilation = {param_->dilation_h_, param_->dilation_w_}; cl_int2 dilation = {param_->dilation_h_, param_->dilation_w_};
int arg_cn = 2; int arg_cn = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_); }
ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void Conv2DOpenCLKernel::SetGlobalLocal() { void Conv2DOpenCLKernel::SetGlobalLocal() {
@ -429,9 +488,18 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
int Conv2DOpenCLKernel::Run() { int Conv2DOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -53,7 +53,7 @@ class Conv2DOpenCLKernel : public OpenCLKernel {
int CheckSpecs() override; int CheckSpecs() override;
int Prepare() override; int Prepare() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;
@ -78,8 +78,8 @@ class Conv2DOpenCLKernel : public OpenCLKernel {
protected: protected:
void InitAttrs(); void InitAttrs();
virtual int BuildKernel(); virtual int BuildKernel();
virtual void InitFilter(); virtual int InitFilter();
void InitBias(); int InitBias();
bool use_fp16_{false}; bool use_fp16_{false};
size_t sizeof_FLT_{4}; size_t sizeof_FLT_{4};
ConvParameter *param_{nullptr}; ConvParameter *param_{nullptr};

View File

@ -55,10 +55,10 @@ int Conv2dTransposeOpenCLKernel::CheckSpecs() {
} }
int Conv2dTransposeOpenCLKernel::Prepare() { int Conv2dTransposeOpenCLKernel::Prepare() {
std::string kernel_name = "conv2d_transpose"; const std::string kernel_name = "conv2d_transpose";
enable_fp16_ = ocl_runtime_->GetFp16Enable(); enable_fp16_ = ocl_runtime_->GetFp16Enable();
std::string source = GetActDefines() + conv2d_transpose_source; std::string source = GetActDefines() + conv2d_transpose_source;
std::string program_name = "conv2d_transpose"; const std::string program_name = "conv2d_transpose";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -74,7 +74,10 @@ int Conv2dTransposeOpenCLKernel::Prepare() {
return ret; return ret;
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -94,7 +97,7 @@ void Conv2dTransposeOpenCLKernel::SetGlobalLocal() {
AlignGlobalLocal(global_size_, local_size_); AlignGlobalLocal(global_size_, local_size_);
} }
void Conv2dTransposeOpenCLKernel::SetConstArgs() { int Conv2dTransposeOpenCLKernel::SetConstArgs() {
int arg_cnt = 2; int arg_cnt = 2;
auto *param = reinterpret_cast<ConvParameter *>(op_parameter_); auto *param = reinterpret_cast<ConvParameter *>(op_parameter_);
int ci = in_tensors_[0]->shape()[3]; int ci = in_tensors_[0]->shape()[3];
@ -115,14 +118,39 @@ void Conv2dTransposeOpenCLKernel::SetConstArgs() {
cl_int2 padding = {pad_h, pad_w}; cl_int2 padding = {pad_h, pad_w};
cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), n}; cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), n};
cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), n}; cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), n};
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); }
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast<cl_int>(param->act_type_)); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast<cl_int>(param->act_type_)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int Conv2dTransposeOpenCLKernel::InitWeights() { int Conv2dTransposeOpenCLKernel::InitWeights() {
@ -147,7 +175,15 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
// IHWO to OHWI4(I)4(O)(converter format is IHWO) // IHWO to OHWI4(I)4(O)(converter format is IHWO)
// init padWeight_(buffer mem) // init padWeight_(buffer mem)
padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size, lite::opencl::MemType::BUF); padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size, lite::opencl::MemType::BUF);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size);
auto origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_; auto origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_;
auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type(); auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type();
@ -188,7 +224,10 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
} }
} }
} }
allocator->UnmapBuffer(padWeight_); if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_weight_); FreeStoredData(stored_weight_);
return RET_OK; return RET_OK;
} }
@ -208,7 +247,15 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
} }
ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
bias_ = allocator->Malloc(img_size); bias_ = allocator->Malloc(img_size);
if (bias_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
if (bias_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(bias_, 0x00, div_co * C4NUM * data_size); memset(bias_, 0x00, div_co * C4NUM * data_size);
if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@ -225,7 +272,10 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
memcpy(bias_, src_data, co * data_size); memcpy(bias_, src_data, co * data_size);
} }
} }
allocator->UnmapBuffer(bias_); if (allocator->UnmapBuffer(bias_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_bias_); FreeStoredData(stored_bias_);
return RET_OK; return RET_OK;
} }
@ -233,9 +283,18 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
int Conv2dTransposeOpenCLKernel::Run() { int Conv2dTransposeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_cnt = 0; int arg_cnt = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -34,7 +34,7 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel {
int InitWeights() override; int InitWeights() override;
int InitFilter(); int InitFilter();
int InitBias(); int InitBias();
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int StoreConstData() override; int StoreConstData() override;

View File

@ -73,7 +73,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
} else { } else {
block_size_.C = block_size_.H = block_size_.W = 1; block_size_.C = block_size_.H = block_size_.W = 1;
} }
std::string program_name = "DepthwiseConv2d"; const std::string program_name = "DepthwiseConv2d";
std::string source = depthwise_conv2d_source; std::string source = depthwise_conv2d_source;
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -94,7 +94,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
return ret; return ret;
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_); MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_);
return RET_OK; return RET_OK;
} }
@ -153,10 +156,12 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype}; ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
packed_weight_ = allocator->Malloc(img_size, temp_filter.data()); packed_weight_ = allocator->Malloc(img_size, temp_filter.data());
} else { } else {
packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data()); packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data());
} }
if (packed_weight_ == nullptr) { if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR; return RET_ERROR;
} }
FreeStoredData(stored_weight_); FreeStoredData(stored_weight_);
@ -199,13 +204,15 @@ int DepthwiseConv2dOpenCLKernel::InitBias() {
} }
bias_data_ = allocator->Malloc(bias_size, temp_bias.data()); bias_data_ = allocator->Malloc(bias_size, temp_bias.data());
if (bias_data_ == nullptr) { if (bias_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR; return RET_ERROR;
} }
FreeStoredData(stored_bias_); FreeStoredData(stored_bias_);
return RET_OK; return RET_OK;
} }
void DepthwiseConv2dOpenCLKernel::SetConstArgs() { int DepthwiseConv2dOpenCLKernel::SetConstArgs() {
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
auto in_info = GpuTensorInfo(in_tensors_[0]); auto in_info = GpuTensorInfo(in_tensors_[0]);
auto out_info = GpuTensorInfo(out_tensors_[0]); auto out_info = GpuTensorInfo(out_tensors_[0]);
@ -222,16 +229,47 @@ void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N}; cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N};
int arg_cnt = 2; int arg_cnt = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); }
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); }
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() { void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() {
@ -286,9 +324,18 @@ int DepthwiseConv2dOpenCLKernel::StoreConstData() {
int DepthwiseConv2dOpenCLKernel::Run() { int DepthwiseConv2dOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -41,7 +41,7 @@ class DepthwiseConv2dOpenCLKernel : public OpenCLKernel {
int CheckSpecs() override; int CheckSpecs() override;
int InitWeights() override; int InitWeights() override;
int InitBias(); int InitBias();
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int StoreConstData() override; int StoreConstData() override;

View File

@ -35,7 +35,10 @@ int FillOpenCLKernel::RunFill() {
cl_int4 fill_value = {}; cl_int4 fill_value = {};
fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_; fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_;
auto src_data = out_tensors_[0]->data_c(); auto src_data = out_tensors_[0]->data_c();
allocator_->GetImageSize(src_data, &img_size); if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
MS_LOG(ERROR) << "GetImageSize failed.";
return RET_ERROR;
}
auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1}; auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@ -59,7 +62,7 @@ int FillOpenCLKernel::RunShape() {
return RET_OK; return RET_OK;
} }
void FillOpenCLKernel::SetConstArgs() {} int FillOpenCLKernel::SetConstArgs() { return RET_OK; }
void FillOpenCLKernel::SetGlobalLocal() {} void FillOpenCLKernel::SetGlobalLocal() {}

View File

@ -31,7 +31,7 @@ class FillOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -98,7 +98,7 @@ int FullConnectionOpenCLKernel::Prepare() {
kernel_name = "FullConnectionWeightVar"; kernel_name = "FullConnectionWeightVar";
} }
std::string source = fullconnection_source; std::string source = fullconnection_source;
std::string program_name = "FullConnection"; const std::string program_name = "FullConnection";
if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + source)) { if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -113,7 +113,10 @@ int FullConnectionOpenCLKernel::Prepare() {
if (ret != RET_OK) { if (ret != RET_OK) {
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
@ -137,7 +140,15 @@ int FullConnectionOpenCLKernel::InitFilter() {
size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
padWeight_ = allocator->Malloc(nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size, padWeight_ = allocator->Malloc(nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size,
lite::opencl::MemType::BUF); lite::opencl::MemType::BUF);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
auto padWeightFp32 = reinterpret_cast<float *>(padWeight_); auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_); auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size); memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size);
@ -183,7 +194,10 @@ int FullConnectionOpenCLKernel::InitFilter() {
} }
} }
} }
allocator->UnmapBuffer(padWeight_); if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_weight_); FreeStoredData(stored_weight_);
return RET_OK; return RET_OK;
} }
@ -202,7 +216,15 @@ int FullConnectionOpenCLKernel::InitBias() {
} }
ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
bias_ = allocator->Malloc(img_size); bias_ = allocator->Malloc(img_size);
if (bias_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
if (bias_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(bias_, 0x00, co4 * C4NUM * dtype_size); memset(bias_, 0x00, co4 * C4NUM * dtype_size);
if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@ -218,7 +240,10 @@ int FullConnectionOpenCLKernel::InitBias() {
memcpy(bias_, src_data, CO_ * dtype_size); memcpy(bias_, src_data, CO_ * dtype_size);
} }
} }
allocator->UnmapBuffer(bias_); if (allocator->UnmapBuffer(bias_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_bias_); FreeStoredData(stored_bias_);
return RET_OK; return RET_OK;
} }
@ -231,22 +256,44 @@ void FullConnectionOpenCLKernel::SetGlobalLocal() {
AlignGlobalLocal(global_size_, local_size_); AlignGlobalLocal(global_size_, local_size_);
} }
void FullConnectionOpenCLKernel::SetConstArgs() { int FullConnectionOpenCLKernel::SetConstArgs() {
if (!weight_var_) { if (!weight_var_) {
ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
int arg_count = 3; int arg_count = 3;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
auto intensor_shape = GpuTensorInfo(in_tensors_[0]); auto intensor_shape = GpuTensorInfo(in_tensors_[0]);
int CI4 = CI_remainder_ * intensor_shape.Slice; int CI4 = CI_remainder_ * intensor_shape.Slice;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
auto in_shape_info = GpuTensorInfo(in_tensors_[0]); auto in_shape_info = GpuTensorInfo(in_tensors_[0]);
cl_int2 in_img_shape = {static_cast<int>(in_shape_info.height), static_cast<int>(in_shape_info.width)}; cl_int2 in_img_shape = {static_cast<int>(in_shape_info.height), static_cast<int>(in_shape_info.width)};
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
auto *param = reinterpret_cast<MatMulParameter *>(op_parameter_); auto *param = reinterpret_cast<MatMulParameter *>(op_parameter_);
ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast<cl_int>(param->act_type_)); if (ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast<cl_int>(param->act_type_)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int FullConnectionOpenCLKernel::StoreConstData() { int FullConnectionOpenCLKernel::StoreConstData() {
@ -270,12 +317,24 @@ int FullConnectionOpenCLKernel::StoreConstData() {
int FullConnectionOpenCLKernel::Run() { int FullConnectionOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_count = 0; int arg_count = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
if (weight_var_) { return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (weight_var_) {
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
} }
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
return RET_OK; return RET_OK;
} }

View File

@ -31,7 +31,7 @@ class FullConnectionOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override { return lite::RET_OK; } int Tune() override { return lite::RET_OK; }
int StoreConstData() override; int StoreConstData() override;

View File

@ -164,8 +164,8 @@ bool IsEltwiseAndOperatorSupported(LiteKernel *node) {
int FusionEltwiseOpenCLKernel::Prepare() { int FusionEltwiseOpenCLKernel::Prepare() {
std::string source = Codegen(); std::string source = Codegen();
std::string program_name = "FusionEltwise\n" + source; const std::string program_name = "FusionEltwise\n" + source;
std::string kernel_name = "FusionEltwise"; const std::string kernel_name = "FusionEltwise";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -183,7 +183,10 @@ int FusionEltwiseOpenCLKernel::Prepare() {
} }
InitWeights(); InitWeights();
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -217,7 +220,14 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
size_t num = tensor_info.ElementsNum; size_t num = tensor_info.ElementsNum;
size_t size = tensor_info.Image2DSize; size_t size = tensor_info.Image2DSize;
void *buffer = allocator->Malloc(size, lite::opencl::MemType::BUF); void *buffer = allocator->Malloc(size, lite::opencl::MemType::BUF);
allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true); if (buffer == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
if (allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(buffer, 0x00, size); memset(buffer, 0x00, size);
if (tensor->data_type() == kNumberTypeFloat16) { if (tensor->data_type() == kNumberTypeFloat16) {
if (use_fp16) { if (use_fp16) {
@ -232,7 +242,10 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
CopyNumber<float32_t, float32_t>(buffer, tensor->data_c(), num); CopyNumber<float32_t, float32_t>(buffer, tensor->data_c(), num);
} }
} }
allocator->UnmapBuffer(buffer); if (allocator->UnmapBuffer(buffer) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
buffer_weights_.push_back(buffer); buffer_weights_.push_back(buffer);
} }
} }
@ -247,7 +260,7 @@ void FusionEltwiseOpenCLKernel::SetGlobalLocal() {
AlignGlobalLocal(global_size_, local_size_); AlignGlobalLocal(global_size_, local_size_);
} }
void FusionEltwiseOpenCLKernel::SetConstArgs() { int FusionEltwiseOpenCLKernel::SetConstArgs() {
auto output = GpuTensorInfo(out_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front());
cl_int4 output_shape = {static_cast<cl_int>(output.N), static_cast<cl_int>(output.H), static_cast<cl_int>(output.W), cl_int4 output_shape = {static_cast<cl_int>(output.N), static_cast<cl_int>(output.H), static_cast<cl_int>(output.W),
static_cast<cl_int>(output.C)}; static_cast<cl_int>(output.C)};
@ -260,18 +273,32 @@ void FusionEltwiseOpenCLKernel::SetConstArgs() {
if (IsScalar(in_tensor->shape())) { if (IsScalar(in_tensor->shape())) {
if (ocl_runtime_->GetFp16Enable()) { if (ocl_runtime_->GetFp16Enable()) {
auto value = static_cast<float16_t>(scalar_weights_[scalar_idx++]); auto value = static_cast<float16_t>(scalar_weights_[scalar_idx++]);
ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast<cl_half *>(&value))); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast<cl_half *>(&value))) != CL_SUCCESS) {
} else { MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]); return RET_ERROR;
} }
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
} else {
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF) !=
CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
} }
arg_idx++; // for act input arg_idx++; // for act input
} }
arg_idx++; // for output arg_idx++; // for output
ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int FusionEltwiseOpenCLKernel::Run() { int FusionEltwiseOpenCLKernel::Run() {
@ -279,12 +306,21 @@ int FusionEltwiseOpenCLKernel::Run() {
int arg_idx = 0; int arg_idx = 0;
for (auto *in_tensor : in_tensors_) { for (auto *in_tensor : in_tensors_) {
if (!in_tensor->IsConst()) { if (!in_tensor->IsConst()) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
arg_idx++; arg_idx++;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -162,7 +162,7 @@ class FusionEltwiseOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int InitWeights() override; int InitWeights() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
void SetConstArgs() override; int SetConstArgs() override;
int Run() override; int Run() override;
void ClearParameter() { op_parameter_ = nullptr; } void ClearParameter() { op_parameter_ = nullptr; }

View File

@ -81,7 +81,7 @@ int GatherOpenCLKernel::CheckSpecs() {
} }
} }
void GatherOpenCLKernel::SetConstArgs() { int GatherOpenCLKernel::SetConstArgs() {
auto input = GpuTensorInfo(in_tensors_.front()); auto input = GpuTensorInfo(in_tensors_.front());
auto output = GpuTensorInfo(out_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front());
int indices_num = in_tensors_.at(1)->ElementsNum(); int indices_num = in_tensors_.at(1)->ElementsNum();
@ -90,10 +90,23 @@ void GatherOpenCLKernel::SetConstArgs() {
cl_int4 dst_size = {static_cast<cl_int>(output.W), static_cast<cl_int>(output.H), static_cast<cl_int>(output.Slice), cl_int4 dst_size = {static_cast<cl_int>(output.W), static_cast<cl_int>(output.H), static_cast<cl_int>(output.Slice),
static_cast<cl_int>(output.N)}; static_cast<cl_int>(output.N)};
int arg_cnt = 3; int arg_cnt = 3;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void GatherOpenCLKernel::SetGlobalLocal() { void GatherOpenCLKernel::SetGlobalLocal() {
@ -104,11 +117,11 @@ void GatherOpenCLKernel::SetGlobalLocal() {
} }
int GatherOpenCLKernel::Prepare() { int GatherOpenCLKernel::Prepare() {
std::string kernel_name = "gather"; const std::string kernel_name = "gather";
if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) { if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) {
axis_ = 3; axis_ = 3;
} }
std::string program_name = "gather"; const std::string program_name = "gather";
if (!ocl_runtime_->LoadSource(program_name, gather_source)) { if (!ocl_runtime_->LoadSource(program_name, gather_source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -127,7 +140,10 @@ int GatherOpenCLKernel::Prepare() {
} }
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -135,11 +151,21 @@ int GatherOpenCLKernel::Prepare() {
int GatherOpenCLKernel::ConvertTensorToweight() { int GatherOpenCLKernel::ConvertTensorToweight() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
auto indices_tensor = in_tensors_.at(1); auto indices_tensor = in_tensors_.at(1);
allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true); if (allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
auto indices_num = indices_tensor->ElementsNum(); auto indices_num = indices_tensor->ElementsNum();
indices_data_ = indices_data_ =
reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF)); reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF));
allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); if (indices_data_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
if (allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
if (indices_data_ == nullptr) { if (indices_data_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed"; MS_LOG(ERROR) << "Memory allocation failed";
return RET_ERROR; return RET_ERROR;
@ -155,8 +181,14 @@ int GatherOpenCLKernel::ConvertTensorToweight() {
<< " But Your type is :" << data_type; << " But Your type is :" << data_type;
return RET_ERROR; return RET_ERROR;
} }
allocator->UnmapBuffer(indices_data_); if (allocator->UnmapBuffer(indices_data_) != RET_OK) {
allocator->UnmapBuffer(indices_tensor->data_c()); MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
if (allocator->UnmapBuffer(indices_tensor->data_c()) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -197,7 +229,10 @@ int GatherOpenCLKernel::PreProcess() {
if (!InferShapeDone()) { if (!InferShapeDone()) {
auto indices_tensor = in_tensors_[1]; auto indices_tensor = in_tensors_[1];
if (!indices_tensor->IsConst()) { if (!indices_tensor->IsConst()) {
ocl_runtime_->SyncCommandQueue(); if (!ocl_runtime_->SyncCommandQueue()) {
MS_LOG(ERROR) << "SyncCommandQueue failed.";
return RET_ERROR;
}
indices_tensor->MutableData(); indices_tensor->MutableData();
} }
} }
@ -209,10 +244,22 @@ int GatherOpenCLKernel::Run() {
if (intensor1_is_tensor) { if (intensor1_is_tensor) {
ConvertTensorToweight(); ConvertTensorToweight();
} }
ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); return RET_ERROR;
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); }
if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -34,7 +34,7 @@ class GatherOpenCLKernel : public OpenCLKernel {
int PreProcess() override; int PreProcess() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override { return lite::RET_OK; } int Tune() override { return lite::RET_OK; }
int ConvertTensorToweight(); int ConvertTensorToweight();

View File

@ -98,6 +98,10 @@ int ArithmeticInt8OpenCLKernel::InitWeights() {
size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT; size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT;
ImageSize img_size{in_shape.width, in_shape.height, dtype}; ImageSize img_size{in_shape.width, in_shape.height, dtype};
auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
if (weight_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
weight_ptrs_.push_back(weight_ptr_); weight_ptrs_.push_back(weight_ptr_);
} else { } else {
weight_ptrs_.push_back(nullptr); weight_ptrs_.push_back(nullptr);
@ -106,7 +110,7 @@ int ArithmeticInt8OpenCLKernel::InitWeights() {
return RET_OK; return RET_OK;
} }
void ArithmeticInt8OpenCLKernel::SetConstArgs() { int ArithmeticInt8OpenCLKernel::SetConstArgs() {
int arg_idx = 3; int arg_idx = 3;
if (!element_flag_) { if (!element_flag_) {
cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W), cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W),
@ -121,16 +125,37 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() {
} else if (in0_shape_.C != 1 && in1_shape_.C == 1) { } else if (in0_shape_.C != 1 && in1_shape_.C == 1) {
broadcastC_flag = 2; // BroadCast C4 in input1 broadcastC_flag = 2; // BroadCast C4 in input1
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])}; cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_);
// set quantization parameter. // set quantization parameter.
auto input0_quant_param = in_tensors_[0]->quant_params().front(); auto input0_quant_param = in_tensors_[0]->quant_params().front();
@ -141,8 +166,15 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() {
cl_char4 zero_point = {static_cast<int8_t>(input0_quant_param.zeroPoint), cl_char4 zero_point = {static_cast<int8_t>(input0_quant_param.zeroPoint),
static_cast<int8_t>(input1_quant_param.zeroPoint), static_cast<int8_t>(input1_quant_param.zeroPoint),
static_cast<int8_t>(output_quant_param.zeroPoint), 0}; static_cast<int8_t>(output_quant_param.zeroPoint), 0};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); // scale if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point); // zero_point MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // scale
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // zero_point
return RET_OK;
} }
int ArithmeticInt8OpenCLKernel::Prepare() { int ArithmeticInt8OpenCLKernel::Prepare() {
@ -191,7 +223,7 @@ int ArithmeticInt8OpenCLKernel::Prepare() {
activation_max_ = 6.f; activation_max_ = 6.f;
} }
std::string program_name = "Arithmetic"; const std::string program_name = "Arithmetic";
std::string source = arithmetic_source; std::string source = arithmetic_source;
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -207,7 +239,10 @@ int ArithmeticInt8OpenCLKernel::Prepare() {
if (type() != PrimitiveType_BiasAdd) { if (type() != PrimitiveType_BiasAdd) {
InitWeights(); InitWeights();
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name_ << " Init Done!"; MS_LOG(DEBUG) << kernel_name_ << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -218,10 +253,22 @@ int ArithmeticInt8OpenCLKernel::Run() {
auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1]; auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1];
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); return RET_ERROR;
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -33,7 +33,7 @@ class ArithmeticInt8OpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -67,15 +67,31 @@ void LayerNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
local->push_back(z); local->push_back(z);
} }
void LayerNormOpenCLKernel::SetConstArgs() { int LayerNormOpenCLKernel::SetConstArgs() {
int arg_cn = 6; int arg_cn = 6;
GpuTensorInfo img_info(in_tensors_.at(0)); GpuTensorInfo img_info(in_tensors_.at(0));
in_shape_.s[0] = img_info.N, in_shape_.s[1] = img_info.H, in_shape_.s[2] = img_info.W, in_shape_.s[3] = img_info.C; in_shape_.s[0] = img_info.N, in_shape_.s[1] = img_info.H, in_shape_.s[2] = img_info.W, in_shape_.s[3] = img_info.C;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_); }
ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void AlignMeanVarGlobalLocal(const std::vector<int> &global, const std::vector<int> &local, cl::NDRange *global_range, void AlignMeanVarGlobalLocal(const std::vector<int> &global, const std::vector<int> &local, cl::NDRange *global_range,
@ -106,9 +122,23 @@ int LayerNormOpenCLKernel::Initweight() {
size_t weight_size = img_info.Image2DSize; size_t weight_size = img_info.Image2DSize;
// allocated memory for weight and init value // allocated memory for weight and init value
gamma_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); gamma_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
if (gamma_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
beta_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); beta_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true); if (beta_ == nullptr) {
allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true); MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
if (allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
if (allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(gamma_, 0x01, weight_size); memset(gamma_, 0x01, weight_size);
memset(beta_, 0x00, weight_size); memset(beta_, 0x00, weight_size);
@ -143,8 +173,14 @@ int LayerNormOpenCLKernel::Initweight() {
memcpy(beta_, in_tensors_.at(2)->data_c(), weight_size); memcpy(beta_, in_tensors_.at(2)->data_c(), weight_size);
} }
} }
allocator->UnmapBuffer(gamma_); if (allocator->UnmapBuffer(gamma_) != RET_OK) {
allocator->UnmapBuffer(beta_); MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
if (allocator->UnmapBuffer(beta_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -164,11 +200,19 @@ int LayerNormOpenCLKernel::Prepare() {
size_t size_dtype = use_fp16_enable_ ? sizeof(float16_t) : sizeof(float); size_t size_dtype = use_fp16_enable_ ? sizeof(float16_t) : sizeof(float);
mean_size *= size_dtype; mean_size *= size_dtype;
mean_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF); mean_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF);
if (mean_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
var_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF); var_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF);
std::string kernel_name = "LayerNormalization_NHWC4"; if (var_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
const std::string kernel_name = "LayerNormalization_NHWC4";
std::string kernel_name_mean_var = "ComputeMeanVar"; std::string kernel_name_mean_var = "ComputeMeanVar";
std::string source = layer_norm_source; std::string source = layer_norm_source;
std::string program_name = "LayerNormalization"; const std::string program_name = "LayerNormalization";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -182,7 +226,10 @@ int LayerNormOpenCLKernel::Prepare() {
kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4"; kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4";
ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext); ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext);
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
@ -191,21 +238,48 @@ int LayerNormOpenCLKernel::Prepare() {
int LayerNormOpenCLKernel::Run() { int LayerNormOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
int arg1_cn = 0; int arg1_cn = 0;
ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()); // input tensor if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF); // mean_ MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF); // var_ return RET_OK; return RET_ERROR;
} // input tensor
if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
ocl_runtime_->RunKernel(kernel_mean_var_, global_mean_var_, local_mean_var_, nullptr, &event_); ocl_runtime_->RunKernel(kernel_mean_var_, global_mean_var_, local_mean_var_, nullptr, &event_);
int arg_cn = 0; int arg_cn = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); // input tensor if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); // out tensor MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF); // mean_ return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF); // var_ } // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF); // gamma_ if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF); // beta_ MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // out tensor
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // mean_
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // var_
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // gamma_
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} // beta_
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
return RET_OK; return RET_OK;
} } // namespace mindspore::kernel
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>) REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>) REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>)

View File

@ -31,7 +31,7 @@ class LayerNormOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -84,7 +84,7 @@ int MatMulOpenCLKernel::Prepare() {
std::map<int, std::string> dims2str = {{2, "_2d"}, {3, "_4d"}, {4, "_4d"}}; std::map<int, std::string> dims2str = {{2, "_2d"}, {3, "_4d"}, {4, "_4d"}};
kernel_name += dims2str[dims]; kernel_name += dims2str[dims];
std::string source = matmul_source; std::string source = matmul_source;
std::string program_name = "MatMul"; const std::string program_name = "MatMul";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -95,13 +95,16 @@ int MatMulOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int co) { int MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int co) {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
int a = weight_shape_4d[0]; int a = weight_shape_4d[0];
int b = weight_shape_4d[1]; int b = weight_shape_4d[1];
@ -109,7 +112,15 @@ void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int
int co4 = UP_DIV(co, C4NUM); int co4 = UP_DIV(co, C4NUM);
size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
padWeight_ = allocator->Malloc(a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF); padWeight_ = allocator->Malloc(a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
auto padWeightFp32 = reinterpret_cast<float *>(padWeight_); auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_); auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size); memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size);
@ -157,6 +168,7 @@ void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int
} }
} }
} }
return RET_OK;
} }
int MatMulOpenCLKernel::InitWeights() { int MatMulOpenCLKernel::InitWeights() {
@ -185,7 +197,10 @@ int MatMulOpenCLKernel::InitWeights() {
PadWeight(weight_shape_4d, ci, CO_); PadWeight(weight_shape_4d, ci, CO_);
allocator->UnmapBuffer(padWeight_); if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_weight_); FreeStoredData(stored_weight_);
return InitBias(); return InitBias();
} }
@ -204,7 +219,15 @@ int MatMulOpenCLKernel::InitBias() {
} }
lite::opencl::ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; lite::opencl::ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
bias_ = allocator->Malloc(img_size); bias_ = allocator->Malloc(img_size);
if (bias_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
if (bias_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(bias_, 0x00, co4 * C4NUM * dtype_size); memset(bias_, 0x00, co4 * C4NUM * dtype_size);
if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@ -220,7 +243,10 @@ int MatMulOpenCLKernel::InitBias() {
memcpy(bias_, src_data, CO_ * dtype_size); memcpy(bias_, src_data, CO_ * dtype_size);
} }
} }
allocator->UnmapBuffer(bias_); if (allocator->UnmapBuffer(bias_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
FreeStoredData(stored_bias_); FreeStoredData(stored_bias_);
return RET_OK; return RET_OK;
} }
@ -235,29 +261,54 @@ void MatMulOpenCLKernel::SetGlobalLocal() {
AlignGlobalLocal(global_size_, local_size_); AlignGlobalLocal(global_size_, local_size_);
} }
void MatMulOpenCLKernel::SetConstArgs() { int MatMulOpenCLKernel::SetConstArgs() {
int arg_count = 2; int arg_count = 2;
cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
if (act_weight_) { if (act_weight_) {
arg_count++; arg_count++;
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); }
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int MatMulOpenCLKernel::Run() { int MatMulOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_count = 0; int arg_count = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
if (act_weight_) { return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (act_weight_) {
if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
} }
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class MatMulOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override { return lite::RET_OK; } int Tune() override { return lite::RET_OK; }
int InitBias(); int InitBias();
@ -54,7 +54,7 @@ class MatMulOpenCLKernel : public OpenCLKernel {
std::vector<int> outShape{std::vector<int>(MAX_DIMS, 1)}; std::vector<int> outShape{std::vector<int>(MAX_DIMS, 1)};
private: private:
void PadWeight(std::vector<int> weight_shape_4d, int ci, int co); int PadWeight(std::vector<int> weight_shape_4d, int ci, int co);
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -48,7 +48,7 @@ int OneHotOpenCLKernel::Prepare() {
kernel_name += "Axis" + std::to_string(axis_); kernel_name += "Axis" + std::to_string(axis_);
} }
std::string source = one_hot_source; std::string source = one_hot_source;
std::string program_name = "OneHot"; const std::string program_name = "OneHot";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -65,7 +65,10 @@ int OneHotOpenCLKernel::Prepare() {
return ret; return ret;
} }
InitWeights(); InitWeights();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
@ -87,18 +90,40 @@ int OneHotOpenCLKernel::InitWeights() {
return RET_OK; return RET_OK;
} }
void OneHotOpenCLKernel::SetConstArgs() { int OneHotOpenCLKernel::SetConstArgs() {
cl_int2 cl_in_image2d_shape = {static_cast<cl_int>(in_shape_.width), static_cast<cl_int>(in_shape_.height)}; cl_int2 cl_in_image2d_shape = {static_cast<cl_int>(in_shape_.width), static_cast<cl_int>(in_shape_.height)};
cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H), cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)}; static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
int arg_idx = 2; int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_); }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C)); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_)); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void OneHotOpenCLKernel::SetGlobalLocal() { void OneHotOpenCLKernel::SetGlobalLocal() {
local_size_ = {}; local_size_ = {};
@ -108,9 +133,18 @@ void OneHotOpenCLKernel::SetGlobalLocal() {
int OneHotOpenCLKernel::Run() { int OneHotOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -33,7 +33,7 @@ class OneHotOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int InitWeights() override; int InitWeights() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -81,11 +81,14 @@ int PadOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
void PadOpenCLKernel::SetConstArgs() { int PadOpenCLKernel::SetConstArgs() {
auto input = GpuTensorInfo(in_tensors_.front()); auto input = GpuTensorInfo(in_tensors_.front());
auto output = GpuTensorInfo(out_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front());
cl_int4 input_shape = {static_cast<cl_int>(input.N), static_cast<cl_int>(input.H), static_cast<cl_int>(input.W), cl_int4 input_shape = {static_cast<cl_int>(input.N), static_cast<cl_int>(input.H), static_cast<cl_int>(input.W),
@ -105,20 +108,45 @@ void PadOpenCLKernel::SetConstArgs() {
Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0); Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0);
int arg_cn = 2; int arg_cn = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before); }
ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
local_size_ = {8, 4, 1}; local_size_ = {8, 4, 1};
global_size_ = {output.N * output.H, output.W, output.Slice}; global_size_ = {output.N * output.H, output.W, output.Slice};
AlignGlobalLocal(global_size_, local_size_); AlignGlobalLocal(global_size_, local_size_);
return RET_OK;
} }
int PadOpenCLKernel::Run() { int PadOpenCLKernel::Run() {
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -35,7 +35,7 @@ class PadOpenCLKernel : public OpenCLKernel {
int CheckSpecs() override; int CheckSpecs() override;
int Prepare() override; int Prepare() override;
void SetConstArgs() override; int SetConstArgs() override;
int Run() override; int Run() override;

View File

@ -73,7 +73,7 @@ int PoolingOpenCLKernel::Prepare() {
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
kernel_name += "_IMG"; kernel_name += "_IMG";
std::string source = pooling2d_source; std::string source = pooling2d_source;
std::string program_name = "Pooling2d"; const std::string program_name = "Pooling2d";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -84,7 +84,10 @@ int PoolingOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
@ -100,7 +103,7 @@ void PoolingOpenCLKernel::SetGlobalLocal() {
AlignGlobalLocal(global_size_, local_size_); AlignGlobalLocal(global_size_, local_size_);
} }
void PoolingOpenCLKernel::SetConstArgs() { int PoolingOpenCLKernel::SetConstArgs() {
int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
cl_int4 input_shape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], slices}; cl_int4 input_shape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], slices};
cl_int4 output_shape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2], cl_int4 output_shape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2],
@ -109,19 +112,44 @@ void PoolingOpenCLKernel::SetConstArgs() {
cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
int arg_idx = 2; int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size); }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int PoolingOpenCLKernel::Run() { int PoolingOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class PoolingOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -63,15 +63,21 @@ void PowerGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l
local->push_back(z); local->push_back(z);
} }
void PowerOpenCLKernel::SetConstArgs() { int PowerOpenCLKernel::SetConstArgs() {
float unalign_w = static_cast<float>(out_shape_.s[3]); float unalign_w = static_cast<float>(out_shape_.s[3]);
out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
int arg_cn = 2; int arg_cn = 2;
if (!broadcast_) { if (!broadcast_) {
arg_cn++; arg_cn++;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
if (use_fp16_enable_) { if (use_fp16_enable_) {
auto x = static_cast<float16_t>(power_); auto x = static_cast<float16_t>(power_);
@ -80,12 +86,19 @@ void PowerOpenCLKernel::SetConstArgs() {
auto w = static_cast<float16_t>(unalign_w); auto w = static_cast<float16_t>(unalign_w);
cl_half4 parameter = {*(reinterpret_cast<uint16_t *>(&x)), *(reinterpret_cast<uint16_t *>(&y)), cl_half4 parameter = {*(reinterpret_cast<uint16_t *>(&x)), *(reinterpret_cast<uint16_t *>(&y)),
*(reinterpret_cast<uint16_t *>(&z)), *(reinterpret_cast<uint16_t *>(&w))}; *(reinterpret_cast<uint16_t *>(&z)), *(reinterpret_cast<uint16_t *>(&w))};
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
cl_float4 parameter = {power_, shift_, scale_, unalign_w}; cl_float4 parameter = {power_, shift_, scale_, unalign_w};
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
} }
return RET_OK;
}
void PowerOpenCLKernel::SetGlobalLocal() { void PowerOpenCLKernel::SetGlobalLocal() {
cl_int4 output_shape = {}; cl_int4 output_shape = {};
@ -111,7 +124,7 @@ int PowerOpenCLKernel::Prepare() {
auto param = reinterpret_cast<PowerParameter *>(this->op_parameter_); auto param = reinterpret_cast<PowerParameter *>(this->op_parameter_);
std::string kernel_name = "power"; std::string kernel_name = "power";
std::string source = power_source; std::string source = power_source;
std::string program_name = "power"; const std::string program_name = "power";
if (broadcast_) { if (broadcast_) {
power_ = param->power_; power_ = param->power_;
kernel_name += "_broadcast"; kernel_name += "_broadcast";
@ -130,7 +143,10 @@ int PowerOpenCLKernel::Prepare() {
} }
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -138,13 +154,28 @@ int PowerOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
int arg_cn = 0; int arg_cn = 0;
if (broadcast_) { if (broadcast_) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
} else { MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()); }
} else {
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_);
return RET_OK; return RET_OK;
} }

View File

@ -30,7 +30,7 @@ class PowerOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -46,7 +46,14 @@ int PReluOpenCLKernel::InitWeights() {
auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float); auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
size_t weight_size = UP_ROUND(C_, C4NUM) * sizeof_FLT; size_t weight_size = UP_ROUND(C_, C4NUM) * sizeof_FLT;
weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true); if (weight_vector_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(weight_vector_, 0x00, weight_size); memset(weight_vector_, 0x00, weight_size);
if (weight_tensor->data_type() == kNumberTypeFloat16) { if (weight_tensor->data_type() == kNumberTypeFloat16) {
if (enable_fp16_) { if (enable_fp16_) {
@ -69,7 +76,10 @@ int PReluOpenCLKernel::InitWeights() {
memcpy(weight_vector_, weight_tensor->data_c(), C_ * sizeof_FLT); memcpy(weight_vector_, weight_tensor->data_c(), C_ * sizeof_FLT);
} }
} }
allocator->UnmapBuffer(weight_vector_); if (allocator->UnmapBuffer(weight_vector_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
} }
return RET_OK; return RET_OK;
} }
@ -95,11 +105,18 @@ int PReluOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void PReluOpenCLKernel::SetConstArgs() { int PReluOpenCLKernel::SetConstArgs() {
int arg_idx = 3; int arg_idx = 3;
out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void PReluOpenCLKernel::SetGlobalLocal() { void PReluOpenCLKernel::SetGlobalLocal() {
@ -126,8 +143,8 @@ int PReluOpenCLKernel::Prepare() {
weight_is_scalar = param->channelShared; weight_is_scalar = param->channelShared;
enable_fp16_ = ocl_runtime_->GetFp16Enable(); enable_fp16_ = ocl_runtime_->GetFp16Enable();
std::string source = prelu_source; std::string source = prelu_source;
std::string program_name = "PRelu"; const std::string program_name = "PRelu";
std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); const std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector");
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -141,7 +158,10 @@ int PReluOpenCLKernel::Prepare() {
InitWeights(); InitWeights();
MS_LOG(DEBUG) << program_name << " init Done!"; MS_LOG(DEBUG) << program_name << " init Done!";
MS_LOG(DEBUG) << "kernel_name=: " << kernel_name << " init Done!"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name << " init Done!";
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
} }
@ -149,12 +169,24 @@ int PReluOpenCLKernel::Prepare() {
int PReluOpenCLKernel::Run() { int PReluOpenCLKernel::Run() {
MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (weight_is_scalar) { if (weight_is_scalar) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
if (ret != mindspore::lite::RET_OK) { if (ret != mindspore::lite::RET_OK) {

View File

@ -31,7 +31,7 @@ class PReluOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;
int InitWeights() override; int InitWeights() override;

View File

@ -17,6 +17,7 @@
#include <set> #include <set>
#include <string> #include <string>
#include <map> #include <map>
#include <algorithm>
#include "include/errorcode.h" #include "include/errorcode.h"
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/kernel/reduce.h" #include "src/runtime/kernel/opencl/kernel/reduce.h"
@ -179,7 +180,7 @@ int ReduceOpenCLKernel::Prepare() {
} }
kernel_name += GetReduceTypeStr(reduce_param->mode_); kernel_name += GetReduceTypeStr(reduce_param->mode_);
std::string source = reduce_source; std::string source = reduce_source;
std::string program_name = "Reduce"; const std::string program_name = "Reduce";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -190,22 +191,32 @@ int ReduceOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
void ReduceOpenCLKernel::SetConstArgs() { int ReduceOpenCLKernel::SetConstArgs() {
int h = inShape.H; int h = inShape.H;
int w = inShape.W; int w = inShape.W;
int c = inShape.C; int c = inShape.C;
int c4 = UP_DIV(c, C4NUM); int c4 = UP_DIV(c, C4NUM);
cl_int4 size = {h, w, c4, c}; cl_int4 size = {h, w, c4, c};
int arg_idx = 2; int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) {
if (wc_reduce_ || c_reduce_) { MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()); return RET_ERROR;
} }
if (wc_reduce_ || c_reduce_) {
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
return RET_OK;
} }
void ReduceOpenCLKernel::SetGlobalLocal() { void ReduceOpenCLKernel::SetGlobalLocal() {
int h = inShape.H; int h = inShape.H;
@ -235,9 +246,18 @@ int ReduceOpenCLKernel::Tune() {
int ReduceOpenCLKernel::Run() { int ReduceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class ReduceOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override; int Tune() override;

View File

@ -53,15 +53,22 @@ int ReshapeOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void ReshapeOpenCLKernel::SetConstArgs() { int ReshapeOpenCLKernel::SetConstArgs() {
auto in = GpuTensorInfo(in_tensors_.front()); auto in = GpuTensorInfo(in_tensors_.front());
auto out = GpuTensorInfo(out_tensors_.front()); auto out = GpuTensorInfo(out_tensors_.front());
cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)}; cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)};
cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)}; cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)};
int arg_idx = 2; int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void ReshapeOpenCLKernel::SetGlobalLocal() { void ReshapeOpenCLKernel::SetGlobalLocal() {
@ -72,9 +79,9 @@ void ReshapeOpenCLKernel::SetGlobalLocal() {
} }
int ReshapeOpenCLKernel::Prepare() { int ReshapeOpenCLKernel::Prepare() {
std::string kernel_name = "reshape_NHWC4"; const std::string kernel_name = "reshape_NHWC4";
std::string source = reshape_source; std::string source = reshape_source;
std::string program_name = "reshape"; const std::string program_name = "reshape";
auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -87,16 +94,28 @@ int ReshapeOpenCLKernel::Prepare() {
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
int ReshapeOpenCLKernel::Run() { int ReshapeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -104,7 +123,10 @@ int ReshapeOpenCLKernel::PreProcess() {
if (type() == PrimitiveType_Reshape && !InferShapeDone()) { if (type() == PrimitiveType_Reshape && !InferShapeDone()) {
auto shape_tensor = in_tensors_[1]; auto shape_tensor = in_tensors_[1];
if (!shape_tensor->IsConst()) { if (!shape_tensor->IsConst()) {
ocl_runtime_->SyncCommandQueue(); if (!ocl_runtime_->SyncCommandQueue()) {
MS_LOG(ERROR) << "SyncCommandQueue failed.";
return RET_ERROR;
}
shape_tensor->MutableData(); shape_tensor->MutableData();
} }
} }

View File

@ -30,7 +30,7 @@ class ReshapeOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int PreProcess() override; int PreProcess() override;
}; };

View File

@ -64,7 +64,7 @@ int ResizeOpenCLKernel::Prepare() {
} }
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
std::string source = resize_source; std::string source = resize_source;
std::string program_name = "Resize"; const std::string program_name = "Resize";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -75,7 +75,10 @@ int ResizeOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
@ -87,7 +90,7 @@ float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size)
: static_cast<float>(input_size) / static_cast<float>(output_size); : static_cast<float>(input_size) / static_cast<float>(output_size);
} }
void ResizeOpenCLKernel::SetConstArgs() { int ResizeOpenCLKernel::SetConstArgs() {
auto in_shape = in_tensors_[0]->shape(); auto in_shape = in_tensors_[0]->shape();
auto out_shape = out_tensors_[0]->shape(); auto out_shape = out_tensors_[0]->shape();
int n = out_shape[0]; int n = out_shape[0];
@ -101,9 +104,19 @@ void ResizeOpenCLKernel::SetConstArgs() {
cl_int4 out_size = {n, h, w, c4}; cl_int4 out_size = {n, h, w, c4};
cl_float2 scale = {scale_h, scale_w}; cl_float2 scale = {scale_h, scale_w};
int arg_idx = 2; int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void ResizeOpenCLKernel::SetGlobalLocal() { void ResizeOpenCLKernel::SetGlobalLocal() {
@ -116,9 +129,18 @@ void ResizeOpenCLKernel::SetGlobalLocal() {
int ResizeOpenCLKernel::Run() { int ResizeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -126,7 +148,10 @@ int ResizeOpenCLKernel::PreProcess() {
if (type() == PrimitiveType_Resize && !InferShapeDone() && in_tensors_.size() == INPUT_TENSOR_SIZE_2) { if (type() == PrimitiveType_Resize && !InferShapeDone() && in_tensors_.size() == INPUT_TENSOR_SIZE_2) {
auto shape_tensor = in_tensors_[1]; auto shape_tensor = in_tensors_[1];
if (!shape_tensor->IsConst()) { if (!shape_tensor->IsConst()) {
ocl_runtime_->SyncCommandQueue(); if (!ocl_runtime_->SyncCommandQueue()) {
MS_LOG(ERROR) << "SyncCommandQueue failed.";
return RET_ERROR;
}
shape_tensor->MutableData(); shape_tensor->MutableData();
} }
} }

View File

@ -31,7 +31,7 @@ class ResizeOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int PreProcess() override; int PreProcess() override;

View File

@ -98,14 +98,30 @@ int ScaleOpenCLKernel::InitWeights() {
img_size.height = 1; img_size.height = 1;
img_size.width = UP_DIV(scale_tensor->shape()[0], C4NUM); img_size.width = UP_DIV(scale_tensor->shape()[0], C4NUM);
scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c()); scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c());
if (scale_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c()); offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c());
if (offset_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
if (in_tensor->format() == scale_tensor->format()) { if (in_tensor->format() == scale_tensor->format()) {
if (in_tensor->data_type() == scale_tensor->data_type()) { if (in_tensor->data_type() == scale_tensor->data_type()) {
scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c()); scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c());
if (scale_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c()); offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c());
if (offset_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
} else { } else {
MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to " MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to "
<< in_tensor->data_type(); << in_tensor->data_type();
@ -121,7 +137,15 @@ int ScaleOpenCLKernel::InitWeights() {
PackNHWCToNHWC4(scale_tensor->data_c(), scale.data(), src_is_fp16, fp16_enable, image2d_info); PackNHWCToNHWC4(scale_tensor->data_c(), scale.data(), src_is_fp16, fp16_enable, image2d_info);
PackNHWCToNHWC4(offset_tensor->data_c(), offset.data(), src_is_fp16, fp16_enable, image2d_info); PackNHWCToNHWC4(offset_tensor->data_c(), offset.data(), src_is_fp16, fp16_enable, image2d_info);
scale_ptr_ = allocator->Malloc(img_size, scale.data()); scale_ptr_ = allocator->Malloc(img_size, scale.data());
if (scale_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
offset_ptr_ = allocator->Malloc(img_size, offset.data()); offset_ptr_ = allocator->Malloc(img_size, offset.data());
if (offset_ptr_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
} else { } else {
MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to " MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to "
<< in_tensor->data_type(); << in_tensor->data_type();
@ -175,7 +199,7 @@ int ScaleOpenCLKernel::Prepare() {
} else { } else {
kernel_name += "_BUF"; kernel_name += "_BUF";
} }
std::string program_name = "Scale"; const std::string program_name = "Scale";
std::string source = GetActDefines() + scale_source; std::string source = GetActDefines() + scale_source;
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -193,44 +217,86 @@ int ScaleOpenCLKernel::Prepare() {
return RET_OK; return RET_OK;
} }
int ScaleOpenCLKernel::Run() { int ScaleOpenCLKernel::SetKernelArg(int *idx) {
MS_LOG(DEBUG) << this->name() << " Running!";
auto *param = reinterpret_cast<const ScaleParameter *>(op_parameter_);
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
return RET_ERROR;
}
if (weight_vector_flag_) { if (weight_vector_flag_) {
void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_; void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_;
void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_; void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) {
return RET_ERROR;
}
} else { } else {
if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
float scale = static_cast<float *>(in_tensors_[1]->data_c())[0]; float scale = static_cast<float *>(in_tensors_[1]->data_c())[0];
float offset = static_cast<float *>(in_tensors_[2]->data_c())[0]; float offset = static_cast<float *>(in_tensors_[2]->data_c())[0];
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) {
return RET_ERROR;
}
} else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0]; float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0];
float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0]; float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0];
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale)); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale)) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset)); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset)) != CL_SUCCESS) {
return RET_ERROR;
}
} else { } else {
MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[1]->data_type(); MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[1]->data_type();
return RET_ERROR; return RET_ERROR;
} }
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
return RET_ERROR;
}
cl_int2 output_shape{static_cast<int>(global_size_[0]), static_cast<int>(global_size_[1])}; cl_int2 output_shape{static_cast<int>(global_size_[0]), static_cast<int>(global_size_[1])};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
return RET_ERROR;
}
*idx = arg_idx;
return RET_OK;
}
int ScaleOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto *param = reinterpret_cast<const ScaleParameter *>(op_parameter_);
int arg_idx = 0;
if (SetKernelArg(&arg_idx) != RET_OK) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (weight_vector_flag_ && broadcast_flag_) { if (weight_vector_flag_ && broadcast_flag_) {
if (broadcast_H_flag_) { if (broadcast_H_flag_) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
} }
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_); }
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -34,7 +34,7 @@ class ScaleOpenCLKernel : public OpenCLKernel {
private: private:
void Image2dGetWorkGroupSize(); void Image2dGetWorkGroupSize();
int SetKernelArg(int *idx);
bool weight_vector_flag_{true}; bool weight_vector_flag_{true};
bool broadcast_flag_{false}; bool broadcast_flag_{false};
bool broadcast_H_flag_{false}; bool broadcast_H_flag_{false};

View File

@ -75,7 +75,7 @@ int SoftmaxOpenCLKernel::Prepare() {
kernel_name += "Axis" + std::to_string(axis_); kernel_name += "Axis" + std::to_string(axis_);
} }
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
std::string program_name = "Softmax"; const std::string program_name = "Softmax";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -93,7 +93,10 @@ int SoftmaxOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return lite::RET_OK; return lite::RET_OK;
@ -131,24 +134,40 @@ int SoftmaxOpenCLKernel::Tune() {
return OpenCLKernel::Tune(); return OpenCLKernel::Tune();
} }
void SoftmaxOpenCLKernel::SetConstArgs() { int SoftmaxOpenCLKernel::SetConstArgs() {
int arg_idx = 2; int arg_idx = 2;
int channel = out_shape_.C; int channel = out_shape_.C;
int c4 = out_shape_.Slice; int c4 = out_shape_.Slice;
auto mask_ = GetMaskForLastChannel(channel); auto mask_ = GetMaskForLastChannel(channel);
cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
cl_int4 input_shape = {static_cast<int>(out_shape_.N), static_cast<int>(out_shape_.H), static_cast<int>(out_shape_.W), cl_int4 input_shape = {static_cast<int>(out_shape_.N), static_cast<int>(out_shape_.H), static_cast<int>(out_shape_.W),
c4}; c4};
ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
int SoftmaxOpenCLKernel::Run() { int SoftmaxOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return lite::RET_OK; return lite::RET_OK;
} }

View File

@ -30,7 +30,7 @@ class SoftmaxOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Tune() override; int Tune() override;

View File

@ -61,7 +61,7 @@ int SpaceToBatchNDOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void SpaceToBatchNDOpenCLKernel::SetConstArgs() { int SpaceToBatchNDOpenCLKernel::SetConstArgs() {
auto param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_); auto param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
@ -71,10 +71,23 @@ void SpaceToBatchNDOpenCLKernel::SetConstArgs() {
cl_int4 paddings = {param->paddings_[0], param->paddings_[1], param->paddings_[2], param->paddings_[3]}; cl_int4 paddings = {param->paddings_[0], param->paddings_[1], param->paddings_[2], param->paddings_[3]};
int arg_cnt = 2; int arg_cnt = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings); }
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() { void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() {
@ -87,9 +100,9 @@ void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() {
} }
int SpaceToBatchNDOpenCLKernel::Prepare() { int SpaceToBatchNDOpenCLKernel::Prepare() {
std::string kernel_name = "space_to_batch_nd_NHWC4"; const std::string kernel_name = "space_to_batch_nd_NHWC4";
std::string source = space_to_batch_nd_source; std::string source = space_to_batch_nd_source;
std::string program_name = "space_to_batch_nd"; const std::string program_name = "space_to_batch_nd";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -101,7 +114,10 @@ int SpaceToBatchNDOpenCLKernel::Prepare() {
return ret; return ret;
} }
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -109,9 +125,18 @@ int SpaceToBatchNDOpenCLKernel::Prepare() {
int SpaceToBatchNDOpenCLKernel::Run() { int SpaceToBatchNDOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class SpaceToBatchNDOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -51,7 +51,7 @@ int SpaceToDepthOpenCLKernel::Prepare() {
kernel_name += "Align"; kernel_name += "Align";
} }
std::string source = space_to_depth_source; std::string source = space_to_depth_source;
std::string program_name = "SpaceToDepth"; const std::string program_name = "SpaceToDepth";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -63,29 +63,48 @@ int SpaceToDepthOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
void SpaceToDepthOpenCLKernel::SetConstArgs() { int SpaceToDepthOpenCLKernel::SetConstArgs() {
cl_int4 cl_in_shape = {static_cast<cl_int>(in_shape_.N), static_cast<cl_int>(in_shape_.H), cl_int4 cl_in_shape = {static_cast<cl_int>(in_shape_.N), static_cast<cl_int>(in_shape_.H),
static_cast<cl_int>(in_shape_.W), static_cast<cl_int>(in_shape_.Slice)}; static_cast<cl_int>(in_shape_.W), static_cast<cl_int>(in_shape_.Slice)};
cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H), cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)}; static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
auto param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_); auto param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
int arg_idx = 2; int arg_idx = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (type() == PrimitiveType_DepthToSpace) { if (type() == PrimitiveType_DepthToSpace) {
int co_size = out_shape_.C; int co_size = out_shape_.C;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
int ci_size = in_shape_.C; int ci_size = in_shape_.C;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
} }
return RET_OK;
}
void SpaceToDepthOpenCLKernel::SetGlobalLocal() { void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
local_size_ = {}; local_size_ = {};
global_size_ = {out_shape_.Slice, out_shape_.W, out_shape_.H * out_shape_.N}; global_size_ = {out_shape_.Slice, out_shape_.W, out_shape_.H * out_shape_.N};
@ -95,9 +114,18 @@ void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
int SpaceToDepthOpenCLKernel::Run() { int SpaceToDepthOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class SpaceToDepthOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -37,7 +37,10 @@ int SparseToDenseOpenCLKernel::InitOutputToDefault() {
cl_float4 fill_value = {}; cl_float4 fill_value = {};
fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_; fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_;
auto src_data = out_tensors_[0]->data_c(); auto src_data = out_tensors_[0]->data_c();
allocator_->GetImageSize(src_data, &img_size); if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
MS_LOG(ERROR) << "GetImageSize failed.";
return RET_ERROR;
}
auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1}; auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@ -62,7 +65,14 @@ int SparseToDenseOpenCLKernel::InitWeights() {
auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float); auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
size_t weight_size = UP_ROUND(size, C4NUM) * sizeof_FLT; size_t weight_size = UP_ROUND(size, C4NUM) * sizeof_FLT;
weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true); if (weight_vector_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memset(weight_vector_, 0x00, weight_size); memset(weight_vector_, 0x00, weight_size);
if (weight_tensor->data_type() == kNumberTypeFloat16) { if (weight_tensor->data_type() == kNumberTypeFloat16) {
if (enable_fp16_) { if (enable_fp16_) {
@ -85,7 +95,10 @@ int SparseToDenseOpenCLKernel::InitWeights() {
memcpy(weight_vector_, weight_tensor->data_c(), size * sizeof_FLT); memcpy(weight_vector_, weight_tensor->data_c(), size * sizeof_FLT);
} }
} }
allocator->UnmapBuffer(weight_vector_); if (allocator->UnmapBuffer(weight_vector_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
} }
return RET_OK; return RET_OK;
} }
@ -115,7 +128,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void SparseToDenseOpenCLKernel::SetConstArgs() { int SparseToDenseOpenCLKernel::SetConstArgs() {
auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
GpuTensorInfo img_info(out_tensors_[0]); GpuTensorInfo img_info(out_tensors_[0]);
size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
@ -124,11 +137,27 @@ void SparseToDenseOpenCLKernel::SetConstArgs() {
auto out_shape_temp = out_tensors_[0]->shape(); auto out_shape_temp = out_tensors_[0]->shape();
cl_int4 out_shape = {out_n_, out_h_, out_w_, UP_DIV(out_c_, C4NUM)}; cl_int4 out_shape = {out_n_, out_h_, out_w_, UP_DIV(out_c_, C4NUM)};
int arg_cn = 3; int arg_cn = 3;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void SparseToDenseOpenCLKernel::SetGlobalLocal() { void SparseToDenseOpenCLKernel::SetGlobalLocal() {
@ -144,9 +173,9 @@ int SparseToDenseOpenCLKernel::Prepare() {
input_dim_ = in_tensors_[0]->shape().size(); input_dim_ = in_tensors_[0]->shape().size();
inshapeindex1_dim = in_tensors_[0]->shape()[1]; inshapeindex1_dim = in_tensors_[0]->shape()[1];
weight_scalar_ = in_tensors_[2]->IsScalar(); weight_scalar_ = in_tensors_[2]->IsScalar();
std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector"); const std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector");
std::string source = sparse_to_dense_source; std::string source = sparse_to_dense_source;
std::string program_name = "SparseToDense"; const std::string program_name = "SparseToDense";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -174,7 +203,10 @@ int SparseToDenseOpenCLKernel::Prepare() {
InitWeights(); InitWeights();
InferShapeTo4D(); InferShapeTo4D();
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -212,14 +244,30 @@ int SparseToDenseOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
InitOutputToDefault(); InitOutputToDefault();
int arg_cn = 0; int arg_cn = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
if (!weight_scalar_) { return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF); }
} else { if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_); CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (!weight_scalar_) {
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else {
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
} }
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
return RET_OK; return RET_OK;
} }

View File

@ -31,7 +31,7 @@ class SparseToDenseOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int Run() override; int Run() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int CheckSpecs() override; int CheckSpecs() override;

View File

@ -41,7 +41,10 @@ int SplitOpenCLKernel::RunAxis0() {
for (int i = 0; i < out_tensors_.size(); i++) { for (int i = 0; i < out_tensors_.size(); i++) {
auto dst_data = out_tensors_[i]->data_c(); auto dst_data = out_tensors_[i]->data_c();
ImageSize img_size; ImageSize img_size;
allocator_->GetImageSize(dst_data, &img_size); if (allocator_->GetImageSize(dst_data, &img_size) != RET_OK) {
MS_LOG(ERROR) << "GetImageSize failed.";
return RET_ERROR;
}
auto dst_area = cl::array<cl::size_type, 3U>{0, 0, 0}; auto dst_area = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1}; auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data)); cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
@ -93,23 +96,32 @@ int SplitOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape) { int SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape) {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
int shape_dim = in_shape.at(param->split_dim_); int shape_dim = in_shape.at(param->split_dim_);
if (num_split_ == 1) { if (num_split_ == 1) {
size_t num_split = UP_DIV(shape_dim, param->split_sizes_[0]); size_t num_split = UP_DIV(shape_dim, param->split_sizes_[0]);
split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split * sizeof(int), lite::opencl::MemType::BUF)); split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split * sizeof(int), lite::opencl::MemType::BUF));
if (split_sizes_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
for (int i = 0; i < num_split - 1; ++i) { for (int i = 0; i < num_split - 1; ++i) {
split_sizes_[i] = (i + 1) * param->split_sizes_[0]; split_sizes_[i] = (i + 1) * param->split_sizes_[0];
} }
} else { } else {
int sum = 0; int sum = 0;
split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split_ * sizeof(int), lite::opencl::MemType::BUF)); split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split_ * sizeof(int), lite::opencl::MemType::BUF));
if (split_sizes_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
for (int i = 0; i < num_split_ - 1; ++i) { for (int i = 0; i < num_split_ - 1; ++i) {
sum += param->split_sizes_[i]; sum += param->split_sizes_[i];
split_sizes_[i] = sum; split_sizes_[i] = sum;
} }
} }
return RET_OK;
} }
int SplitOpenCLKernel::Prepare() { int SplitOpenCLKernel::Prepare() {
@ -129,7 +141,10 @@ int SplitOpenCLKernel::Prepare() {
} }
} }
} }
AlignSplitSizes(param, in_shape); if (AlignSplitSizes(param, in_shape) != RET_OK) {
MS_LOG(ERROR) << "AlignSplitSizes failed.";
return RET_ERROR;
}
std::string kernel_name = "split_out"; std::string kernel_name = "split_out";
kernel_name += std::to_string(num_split_); kernel_name += std::to_string(num_split_);
kernel_name += "_axis" + std::to_string(split_dim_); kernel_name += "_axis" + std::to_string(split_dim_);
@ -138,7 +153,7 @@ int SplitOpenCLKernel::Prepare() {
} }
MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
std::string source = split_source; std::string source = split_source;
std::string program_name = "split"; const std::string program_name = "split";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -151,12 +166,15 @@ int SplitOpenCLKernel::Prepare() {
return ret; return ret;
} }
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
} }
void SplitOpenCLKernel::SetConstArgs() { int SplitOpenCLKernel::SetConstArgs() {
int arg_cn = out_tensors_.size() + 2; int arg_cn = out_tensors_.size() + 2;
cl_int4 shape = {}; cl_int4 shape = {};
for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) {
@ -166,7 +184,10 @@ void SplitOpenCLKernel::SetConstArgs() {
if (Align_) { if (Align_) {
in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
for (int i = 0; i < out_tensors_.size(); ++i) { for (int i = 0; i < out_tensors_.size(); ++i) {
cl_int4 temp = {}; cl_int4 temp = {};
@ -177,13 +198,21 @@ void SplitOpenCLKernel::SetConstArgs() {
if (Align_) { if (Align_) {
out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
}
if (!Align_) {
GpuTensorInfo img_info(in_tensors_.at(0)); GpuTensorInfo img_info(in_tensors_.at(0));
size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
stride_w = img_info.RowPitch() / dtype; stride_w = img_info.RowPitch() / dtype;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
return; MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
return RET_OK;
} }
void SplitOpenCLKernel::SetGlobalLocal() { void SplitOpenCLKernel::SetGlobalLocal() {
@ -205,15 +234,31 @@ int SplitOpenCLKernel::Run() {
} }
int arg_cn = 0; int arg_cn = 0;
if (Align_) { if (Align_) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF) !=
CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
for (int i = 0; i < out_tensors_.size(); ++i) { for (int i = 0; i < out_tensors_.size(); ++i) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF);
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
return RET_OK; return RET_OK;
} }

View File

@ -31,12 +31,12 @@ class SplitOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;
private: private:
void AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape); int AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape);
int RunAxis0(); int RunAxis0();
private: private:

View File

@ -36,7 +36,10 @@ int StackOpenCLKernel::RunAxis0() {
cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data)); cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
for (int i = 0; i < in_tensors_.size(); i++) { for (int i = 0; i < in_tensors_.size(); i++) {
auto src_data = in_tensors_[i]->data_c(); auto src_data = in_tensors_[i]->data_c();
allocator_->GetImageSize(src_data, &img_size); if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
MS_LOG(ERROR) << "GetImageSize failed.";
return RET_ERROR;
}
auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1}; auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@ -95,7 +98,7 @@ int StackOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void StackOpenCLKernel::SetConstArgs() { int StackOpenCLKernel::SetConstArgs() {
int arg_cn = in_tensors_.size() + 1; int arg_cn = in_tensors_.size() + 1;
cl_int4 inshape_tmp = {}, outshape_tmp = {}; cl_int4 inshape_tmp = {}, outshape_tmp = {};
for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) {
@ -108,8 +111,14 @@ void StackOpenCLKernel::SetConstArgs() {
Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1); Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1);
in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (buffer_button_) { if (buffer_button_) {
GpuTensorInfo img_info_out(out_tensors_[0]); GpuTensorInfo img_info_out(out_tensors_[0]);
GpuTensorInfo img_info_in(in_tensors_[0]); GpuTensorInfo img_info_in(in_tensors_[0]);
@ -117,9 +126,13 @@ void StackOpenCLKernel::SetConstArgs() {
stride_w_out = img_info_out.RowPitch() / dtype; stride_w_out = img_info_out.RowPitch() / dtype;
stride_w_in = img_info_in.RowPitch() / dtype; stride_w_in = img_info_in.RowPitch() / dtype;
cl_int2 stride_w = {stride_w_out, stride_w_in}; cl_int2 stride_w = {stride_w_out, stride_w_in};
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
} }
return RET_OK;
}
void StackOpenCLKernel::SetGlobalLocal() { void StackOpenCLKernel::SetGlobalLocal() {
if (((in_tensors_[0]->shape().size() == DIMENSION_2D || in_tensors_[0]->shape().size() == DIMENSION_3D) && if (((in_tensors_[0]->shape().size() == DIMENSION_2D || in_tensors_[0]->shape().size() == DIMENSION_3D) &&
@ -162,7 +175,7 @@ int StackOpenCLKernel::Prepare() {
MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
std::string source = stack_source; std::string source = stack_source;
std::string program_name = "stack"; const std::string program_name = "stack";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -174,7 +187,10 @@ int StackOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
@ -188,16 +204,33 @@ int StackOpenCLKernel::Run() {
int arg_cn = 0; int arg_cn = 0;
if (buffer_button_) { if (buffer_button_) {
for (int i = 0; i < in_tensors_.size(); ++i) { for (int i = 0; i < in_tensors_.size(); ++i) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF) !=
CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
} else { } else {
for (int i = 0; i < in_tensors_.size(); ++i) { for (int i = 0; i < in_tensors_.size(); ++i) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
} }
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator<StackOpenCLKernel>); REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator<StackOpenCLKernel>);

View File

@ -29,7 +29,7 @@ class StackOpenCLKernel : public OpenCLKernel {
~StackOpenCLKernel() override{}; ~StackOpenCLKernel() override{};
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -27,9 +27,9 @@ using mindspore::lite::opencl::ImageSize;
namespace mindspore::kernel { namespace mindspore::kernel {
int StrassenOpenCLKernel::Prepare() { int StrassenOpenCLKernel::Prepare() {
std::string kernel_name = "MatMul_Strassen_NHWC4_2d"; const std::string kernel_name = "MatMul_Strassen_NHWC4_2d";
std::string source = strassen_source; std::string source = strassen_source;
std::string program_name = "MatMul"; const std::string program_name = "MatMul";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -50,13 +50,16 @@ int StrassenOpenCLKernel::Prepare() {
if (ret != RET_OK) { if (ret != RET_OK) {
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { int StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
ImageSize img_size{static_cast<size_t>(UP_DIV(NumA, C4NUM)), static_cast<size_t>(NumA), img_dtype}; ImageSize img_size{static_cast<size_t>(UP_DIV(NumA, C4NUM)), static_cast<size_t>(NumA), img_dtype};
@ -64,15 +67,52 @@ void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
size_t memB = NumB * NumB * dtype_size; size_t memB = NumB * NumB * dtype_size;
for (int depth = 0; depth < MAXDEPTH; depth++) { for (int depth = 0; depth < MAXDEPTH; depth++) {
B_temp[depth] = allocator->Malloc(memB, lite::opencl::MemType::BUF); B_temp[depth] = allocator->Malloc(memB, lite::opencl::MemType::BUF);
A_temp[depth] = allocator->Malloc(img_size); if (B_temp[depth] == nullptr) {
M1[depth] = allocator->Malloc(img_size); MS_LOG(ERROR) << "Malloc failed.";
M2[depth] = allocator->Malloc(img_size); return RET_ERROR;
M3[depth] = allocator->Malloc(img_size);
M4[depth] = allocator->Malloc(img_size);
M5[depth] = allocator->Malloc(img_size);
M6[depth] = allocator->Malloc(img_size);
M7[depth] = allocator->Malloc(img_size);
} }
A_temp[depth] = allocator->Malloc(img_size);
if (A_temp[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M1[depth] = allocator->Malloc(img_size);
if (M1[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M2[depth] = allocator->Malloc(img_size);
if (M2[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M3[depth] = allocator->Malloc(img_size);
if (M3[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M4[depth] = allocator->Malloc(img_size);
if (M4[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M5[depth] = allocator->Malloc(img_size);
if (M5[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M6[depth] = allocator->Malloc(img_size);
if (M6[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
M7[depth] = allocator->Malloc(img_size);
if (M7[depth] == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
}
return RET_OK;
} }
int StrassenOpenCLKernel::InitWeights() { int StrassenOpenCLKernel::InitWeights() {
@ -82,14 +122,25 @@ int StrassenOpenCLKernel::InitWeights() {
int NumB = in_tensors_[1]->shape()[0]; int NumB = in_tensors_[1]->shape()[0];
size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
padWeight_ = allocator->Malloc(NumA * NumB * dtype_size, lite::opencl::MemType::BUF); padWeight_ = allocator->Malloc(NumA * NumB * dtype_size, lite::opencl::MemType::BUF);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
if (padWeight_ == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
auto padWeightFp32 = reinterpret_cast<float *>(padWeight_); auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_); auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
memset(padWeight_, 0x00, NumA * NumB * dtype_size); memset(padWeight_, 0x00, NumA * NumB * dtype_size);
auto originWeightFp32 = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c()); auto originWeightFp32 = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c());
auto originWeightFp16 = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c()); auto originWeightFp16 = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c());
bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16; bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16;
AllocatorMemoryForStrassen(NumA / 2, NumB / 2); if (AllocatorMemoryForStrassen(NumA / 2, NumB / 2) != RET_OK) {
MS_LOG(ERROR) << "AllocatorMemoryForStrassen failed.";
return RET_ERROR;
}
size_t size = NumA * NumB * dtype_size; size_t size = NumA * NumB * dtype_size;
if (isModelFp16) { if (isModelFp16) {
if (enable_fp16_) { if (enable_fp16_) {
@ -108,7 +159,10 @@ int StrassenOpenCLKernel::InitWeights() {
memcpy(padWeightFp32, originWeightFp32, size); memcpy(padWeightFp32, originWeightFp32, size);
} }
} }
allocator->UnmapBuffer(padWeight_); if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
@ -120,7 +174,7 @@ void AlignStrassenGlobalLocal(const std::vector<size_t> &global, const std::vect
} }
// 0 : global_size_, 1: global_size_add_sub // 0 : global_size_, 1: global_size_add_sub
void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) { int StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) {
size_t strassen_size_C4 = UP_DIV(strassen_size, C4NUM); size_t strassen_size_C4 = UP_DIV(strassen_size, C4NUM);
local_size_add_sub = {16, 1, 16}; local_size_add_sub = {16, 1, 16};
if (type_flag == 0) { if (type_flag == 0) {
@ -130,6 +184,7 @@ void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type
global_size_add_sub = {strassen_size_C4, 1, strassen_size}; global_size_add_sub = {strassen_size_C4, 1, strassen_size};
AlignStrassenGlobalLocal(global_size_add_sub, local_size_add_sub, &global_add_sub_, &local_add_sub_); AlignStrassenGlobalLocal(global_size_add_sub, local_size_add_sub, &global_add_sub_, &local_add_sub_);
} }
return RET_OK;
} }
void StrassenOpenCLKernel::SetGlobalLocal() { void StrassenOpenCLKernel::SetGlobalLocal() {
@ -142,7 +197,7 @@ void StrassenOpenCLKernel::SetGlobalLocal() {
StrassenSetGlobalLocal(strassen_size, 2); // set global_size_weights StrassenSetGlobalLocal(strassen_size, 2); // set global_size_weights
} }
void StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, int StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size,
bool is_matmul_kernel) { bool is_matmul_kernel) {
cl_int4 shape; cl_int4 shape;
if (is_matmul_kernel) { if (is_matmul_kernel) {
@ -150,95 +205,172 @@ void StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, i
} else { } else {
shape = {strassen_size, 1, 1, UP_DIV(strassen_size, C4NUM)}; shape = {strassen_size, 1, 1, UP_DIV(strassen_size, C4NUM)};
} }
ocl_runtime_->SetKernelArg(*kernel, index, shape); if (ocl_runtime_->SetKernelArg(*kernel, index, shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void StrassenOpenCLKernel::SetConstArgs() { int StrassenOpenCLKernel::SetConstArgs() {
int arg_count = 2;
cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
cl_int4 shape_offset = {0, 0, 0, 0};
int strassen_size = inShape[3] / 2; int strassen_size = inShape[3] / 2;
out_shape.s[2] = in_shape.s[2] = in_shape.s[2] / 2;
out_shape.s[3] = in_shape.s[3] = in_shape.s[3] / 2;
StrassenSetConstArgs(&kernel_IMG_add_sub_2, 3, strassen_size, false); StrassenSetConstArgs(&kernel_IMG_add_sub_2, 3, strassen_size, false);
StrassenSetConstArgs(&kernel_BUF_add_sub_2, 2, strassen_size, false); StrassenSetConstArgs(&kernel_BUF_add_sub_2, 2, strassen_size, false);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); return RET_OK;
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, shape_offset);
} }
void StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, int StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size,
cl_int2 offset, lite::opencl::MemType mem_type) { cl_int2 offset, lite::opencl::MemType mem_type) {
if (input == nullptr || output == nullptr) { if (input == nullptr || output == nullptr) {
MS_LOG(ERROR) << "StrassenDataFilled input or output can not nullptr"; MS_LOG(ERROR) << "StrassenDataFilled input or output can not nullptr";
return; return RET_ERROR;
} }
if (mem_type == lite::opencl::MemType::IMG) { if (mem_type == lite::opencl::MemType::IMG) {
ocl_runtime_->SetKernelArg(*kernel, 0, input); if (ocl_runtime_->SetKernelArg(*kernel, 0, input) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(*kernel, 1, output); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 1, output) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
StrassenSetConstArgs(kernel, 2, size, false); StrassenSetConstArgs(kernel, 2, size, false);
ocl_runtime_->SetKernelArg(*kernel, 3, offset); if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) {
ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK;
} }
void StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset,
int flag, lite::opencl::MemType mem_type) { int flag, lite::opencl::MemType mem_type) {
if (input == nullptr || output == nullptr) { if (input == nullptr || output == nullptr) {
MS_LOG(ERROR) << "StrassenAddSub input or output can not nullptr"; MS_LOG(ERROR) << "StrassenAddSub input or output can not nullptr";
return; return RET_ERROR;
} }
if (mem_type == lite::opencl::MemType::IMG) { if (mem_type == lite::opencl::MemType::IMG) {
ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG); if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} else { } else {
ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF); if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
} }
StrassenSetConstArgs(kernel, 2, size, false); StrassenSetConstArgs(kernel, 2, size, false);
ocl_runtime_->SetKernelArg(*kernel, 3, offset); if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(*kernel, 4, flag); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 4, flag) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK;
} }
void StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, int StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4,
void *input4, void *input5, void *input6, void *input7, void *output, void *input5, void *input6, void *input7, void *output, const int size) {
const int size) {
if (input1 == nullptr || input2 == nullptr || input3 == nullptr || input4 == nullptr || input5 == nullptr || if (input1 == nullptr || input2 == nullptr || input3 == nullptr || input4 == nullptr || input5 == nullptr ||
input6 == nullptr || input7 == nullptr || output == nullptr) { input6 == nullptr || input7 == nullptr || output == nullptr) {
MS_LOG(ERROR) << "StrassenBackResult input or output can not nullptr"; MS_LOG(ERROR) << "StrassenBackResult input or output can not nullptr";
return; return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 0, input1) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 1, input2) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 2, input3) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 3, input4) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 4, input5) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 5, input6) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 6, input7) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(*kernel, 7, output) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(*kernel, 0, input1);
ocl_runtime_->SetKernelArg(*kernel, 1, input2);
ocl_runtime_->SetKernelArg(*kernel, 2, input3);
ocl_runtime_->SetKernelArg(*kernel, 3, input4);
ocl_runtime_->SetKernelArg(*kernel, 4, input5);
ocl_runtime_->SetKernelArg(*kernel, 5, input6);
ocl_runtime_->SetKernelArg(*kernel, 6, input7);
ocl_runtime_->SetKernelArg(*kernel, 7, output);
StrassenSetConstArgs(kernel, 8, size, false); StrassenSetConstArgs(kernel, 8, size, false);
ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK;
} }
void StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) { int StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) {
if (input == nullptr || weight == nullptr || output == nullptr) { if (input == nullptr || weight == nullptr || output == nullptr) {
MS_LOG(ERROR) << "StrassenRunMmatmul input ,weight or output can not nullptr"; MS_LOG(ERROR) << "StrassenRunMmatmul input ,weight or output can not nullptr";
return; return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 0, input) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, output) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
ocl_runtime_->SetKernelArg(kernel_, 0, input);
ocl_runtime_->SetKernelArg(kernel_, 1, output);
ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF);
StrassenSetConstArgs(&kernel_, 3, size, true); StrassenSetConstArgs(&kernel_, 3, size, true);
StrassenSetConstArgs(&kernel_, 4, size, true); StrassenSetConstArgs(&kernel_, 4, size, true);
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK;
} }
void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth, int StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth,
const int threshold) { const int threshold) {
const int size_2 = size / 2; const int size_2 = size / 2;
int C4 = UP_DIV(size_2, C4NUM); int C4 = UP_DIV(size_2, C4NUM);
@ -246,7 +378,7 @@ void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, co
// run matmul; // run matmul;
StrassenSetGlobalLocal(size, 0); StrassenSetGlobalLocal(size, 0);
StrassenRunMmatmul(data, weight, result, size); StrassenRunMmatmul(data, weight, result, size);
return; return RET_OK;
} }
// flag = 0 : add otherwise flag = 1 : sub // flag = 0 : add otherwise flag = 1 : sub
// M1 = A11 * ( B12- B22) // M1 = A11 * ( B12- B22)
@ -307,6 +439,7 @@ void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, co
StrassenSetGlobalLocal(size_2, 1); StrassenSetGlobalLocal(size_2, 1);
StrassenBackResult(&kernel_back_result, M1[depth + 1], M2[depth + 1], M3[depth + 1], M4[depth + 1], M5[depth + 1], StrassenBackResult(&kernel_back_result, M1[depth + 1], M2[depth + 1], M3[depth + 1], M4[depth + 1], M5[depth + 1],
M6[depth + 1], M7[depth + 1], result, size_2); M6[depth + 1], M7[depth + 1], result, size_2);
return RET_OK;
} }
int StrassenOpenCLKernel::Run() { int StrassenOpenCLKernel::Run() {

View File

@ -33,22 +33,22 @@ class StrassenOpenCLKernel : public MatMulOpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int InitWeights() override; int InitWeights() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
// strassen // strassen
private: private:
void AllocatorMemoryForStrassen(int NumA, int NumB); int AllocatorMemoryForStrassen(int NumA, int NumB);
void DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold); int DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold);
void StrassenSetGlobalLocal(size_t strassen_size, int type_flag); int StrassenSetGlobalLocal(size_t strassen_size, int type_flag);
void StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel); int StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel);
void StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset, int StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset,
lite::opencl::MemType mem_type); lite::opencl::MemType mem_type);
void StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag, int StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag,
lite::opencl::MemType mem_type); lite::opencl::MemType mem_type);
void StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5, int StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5,
void *input6, void *input7, void *output, const int size); void *input6, void *input7, void *output, const int size);
void StrassenRunMmatmul(void *input, void *weight, void *output, const int size); int StrassenRunMmatmul(void *input, void *weight, void *output, const int size);
cl::Kernel kernel_IMG_add_sub_2; cl::Kernel kernel_IMG_add_sub_2;
cl::Kernel MatMul_StrassenBUFFilled; cl::Kernel MatMul_StrassenBUFFilled;
cl::Kernel MatMul_StrassenIMGFilled; cl::Kernel MatMul_StrassenIMGFilled;

View File

@ -85,7 +85,7 @@ int StridedSliceOpenCLKernel::CheckSpecs() {
} }
int StridedSliceOpenCLKernel::Prepare() { int StridedSliceOpenCLKernel::Prepare() {
std::string program_name = "strided_slice"; const std::string program_name = "strided_slice";
if (!ocl_runtime_->LoadSource(program_name, strided_slice_source)) { if (!ocl_runtime_->LoadSource(program_name, strided_slice_source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -96,7 +96,10 @@ int StridedSliceOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
return RET_OK; return RET_OK;
} }
@ -187,14 +190,33 @@ int StridedSliceOpenCLKernel::InitConstArgs() {
return RET_OK; return RET_OK;
} }
void StridedSliceOpenCLKernel::SetConstArgs() { int StridedSliceOpenCLKernel::SetConstArgs() {
int arg_cn = 2; int arg_cn = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void StridedSliceOpenCLKernel::SetGlobalLocal() { void StridedSliceOpenCLKernel::SetGlobalLocal() {
@ -214,9 +236,18 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() {
int StridedSliceOpenCLKernel::Run() { int StridedSliceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! "; MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -31,7 +31,7 @@ class StridedSliceOpenCLKernel : public OpenCLKernel {
int CheckSpecs() override; int CheckSpecs() override;
int Prepare() override; int Prepare() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;

View File

@ -42,11 +42,18 @@ int ToFormatOpenCLKernel::CheckSpecs() {
return RET_OK; return RET_OK;
} }
void ToFormatOpenCLKernel::SetConstArgs() { int ToFormatOpenCLKernel::SetConstArgs() {
cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_}; cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_};
cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1}; cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1};
ocl_runtime_->SetKernelArg(kernel_, 2, gsize); if (ocl_runtime_->SetKernelArg(kernel_, 2, gsize) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 3, shape); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 3, shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void ToFormatOpenCLKernel::SetGlobalLocal() { void ToFormatOpenCLKernel::SetGlobalLocal() {
@ -70,7 +77,7 @@ int ToFormatOpenCLKernel::Prepare() {
kernel_name += dtype_str[in_tensor->data_type()] + "_" + dtype_str[out_tensor->data_type()]; kernel_name += dtype_str[in_tensor->data_type()] + "_" + dtype_str[out_tensor->data_type()];
this->set_name(kernel_name); this->set_name(kernel_name);
std::string program_name = "to_format"; const std::string program_name = "to_format";
std::string source = to_format_source; std::string source = to_format_source;
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
@ -89,7 +96,10 @@ int ToFormatOpenCLKernel::Prepare() {
C_ = output.C; C_ = output.C;
SetGlobalLocal(); SetGlobalLocal();
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
@ -98,9 +108,18 @@ int ToFormatOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG;
auto dst_mem_type = out_mem_type_; auto dst_mem_type = out_mem_type_;
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type); if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -35,7 +35,7 @@ class ToFormatOpenCLKernel : public OpenCLKernel {
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int InferShape() override; int InferShape() override;

View File

@ -101,7 +101,7 @@ int TransposeOpenCLKernel::Prepare() {
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
std::string source = transpose_source; std::string source = transpose_source;
std::string program_name = "transpose"; const std::string program_name = "transpose";
if (!ocl_runtime_->LoadSource(program_name, source)) { if (!ocl_runtime_->LoadSource(program_name, source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -113,33 +113,46 @@ int TransposeOpenCLKernel::Prepare() {
MS_LOG(ERROR) << "Build kernel failed."; MS_LOG(ERROR) << "Build kernel failed.";
return ret; return ret;
} }
SetConstArgs(); if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
SetGlobalLocal(); SetGlobalLocal();
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK; return RET_OK;
} }
void TransposeOpenCLKernel::SetConstArgs() { int TransposeOpenCLKernel::SetConstArgs() {
size_t n = tensor_size_.N; size_t n = tensor_size_.N;
size_t h = tensor_size_.H; size_t h = tensor_size_.H;
size_t w = tensor_size_.W; size_t w = tensor_size_.W;
size_t c = tensor_size_.C; size_t c = tensor_size_.C;
int arg_idx = 2; int arg_idx = 2;
cl_int4 shape = {static_cast<int>(n), static_cast<int>(h), static_cast<int>(w), static_cast<int>(c)}; cl_int4 shape = {static_cast<int>(n), static_cast<int>(h), static_cast<int>(w), static_cast<int>(c)};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (type_ == TransposeType::GENERAL) { if (type_ == TransposeType::GENERAL) {
int de_perm[4]; // output to input perm int de_perm[4]; // output to input perm
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
de_perm[perm_4d_[i]] = i; de_perm[perm_4d_[i]] = i;
} }
cl_int4 de_perm_cl = {de_perm[0], de_perm[1], de_perm[2], de_perm[3]}; cl_int4 de_perm_cl = {de_perm[0], de_perm[1], de_perm[2], de_perm[3]};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
GpuTensorInfo in_shape = GpuTensorInfo(in_tensors_[0]); GpuTensorInfo in_shape = GpuTensorInfo(in_tensors_[0]);
cl_int4 in_shape_int4 = {static_cast<cl_int>(in_shape.N), static_cast<cl_int>(in_shape.H), cl_int4 in_shape_int4 = {static_cast<cl_int>(in_shape.N), static_cast<cl_int>(in_shape.H),
static_cast<cl_int>(in_shape.W), static_cast<cl_int>(in_shape.C)}; static_cast<cl_int>(in_shape.W), static_cast<cl_int>(in_shape.C)};
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
} }
} }
return RET_OK;
}
void TransposeOpenCLKernel::SetGlobalLocal() { void TransposeOpenCLKernel::SetGlobalLocal() {
size_t n = tensor_size_.N; size_t n = tensor_size_.N;
@ -161,9 +174,18 @@ void TransposeOpenCLKernel::SetGlobalLocal() {
int TransposeOpenCLKernel::Run() { int TransposeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0; int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -33,7 +33,7 @@ class TransposeOpenCLKernel : public OpenCLKernel {
int Run() override; int Run() override;
int Prepare() override; int Prepare() override;
int CheckSpecs() override; int CheckSpecs() override;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
private: private:

View File

@ -78,7 +78,7 @@ std::vector<float> GenerateWinogradFilter(void *src, TypeId dtype, size_t CO, si
} // namespace } // namespace
int WinogradOpenCLKernel::BuildKernel() { int WinogradOpenCLKernel::BuildKernel() {
std::string program_name = "winograd"; const std::string program_name = "winograd";
if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source)) { if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source)) {
MS_LOG(ERROR) << "Load source failed."; MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR; return RET_ERROR;
@ -103,7 +103,7 @@ int WinogradOpenCLKernel::BuildKernel() {
return RET_OK; return RET_OK;
} }
void WinogradOpenCLKernel::InitFilter() { int WinogradOpenCLKernel::InitFilter() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
// allocate opencl memory: buffer or image2d // allocate opencl memory: buffer or image2d
@ -115,9 +115,17 @@ void WinogradOpenCLKernel::InitFilter() {
size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
size = width * height * CO_TILE * sizeof_FLT_; size = width * height * CO_TILE * sizeof_FLT_;
packed_filter_ = allocator->Malloc({width, height, dtype}); packed_filter_ = allocator->Malloc({width, height, dtype});
if (packed_filter_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
} else { } else {
size = UP_DIV(CO_SLICES_, Ogroup) * 6 * 6 * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_; size = UP_DIV(CO_SLICES_, Ogroup) * 6 * 6 * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_;
packed_filter_ = allocator->Malloc(size, MemType::BUF); packed_filter_ = allocator->Malloc(size, MemType::BUF);
if (packed_filter_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
} }
// rearrange filter // rearrange filter
@ -128,6 +136,10 @@ void WinogradOpenCLKernel::InitFilter() {
void *src_data = winograd_filter.data(); void *src_data = winograd_filter.data();
#else #else
auto winograd_filter = std::make_unique<float[]>(CO_ * 6 * 6 * CI_); auto winograd_filter = std::make_unique<float[]>(CO_ * 6 * 6 * CI_);
if (winograd_filter == nullptr) {
MS_LOG(ERROR) << "new winograd_filter failed.";
return RET_ERROR;
}
WinogradWeightTransform(reinterpret_cast<const float *>(src_filter_data), WinogradWeightTransform(reinterpret_cast<const float *>(src_filter_data),
reinterpret_cast<float *>(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false); reinterpret_cast<float *>(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false);
@ -147,53 +159,121 @@ void WinogradOpenCLKernel::InitFilter() {
if (filter_type_ == MemType::IMG) { if (filter_type_ == MemType::IMG) {
ocl_runtime_->WriteImage(packed_filter_, tmp.data()); ocl_runtime_->WriteImage(packed_filter_, tmp.data());
} else { } else {
allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true); if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
return RET_ERROR;
}
memcpy(packed_filter_, tmp.data(), size); memcpy(packed_filter_, tmp.data(), size);
allocator->UnmapBuffer(packed_filter_); if (allocator->UnmapBuffer(packed_filter_) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
return RET_ERROR;
}
} }
FreeStoredData(stored_filter_); FreeStoredData(stored_filter_);
return RET_OK;
} }
void WinogradOpenCLKernel::AllocateMemory() { int WinogradOpenCLKernel::AllocateMemory() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
size_t width = TILE_HW_; size_t width = TILE_HW_;
size_t height = CI_SLICES_ * 36; size_t height = CI_SLICES_ * 36;
winograd_mem0_ = allocator->Malloc({width, height, img_dtype}); winograd_mem0_ = allocator->Malloc({width, height, img_dtype});
if (winograd_mem0_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
width = TILE_HW_; width = TILE_HW_;
height = CO_SLICES_ * 36; height = CO_SLICES_ * 36;
winograd_mem1_ = allocator->Malloc({width, height, img_dtype}); winograd_mem1_ = allocator->Malloc({width, height, img_dtype});
if (winograd_mem1_ == nullptr) {
MS_LOG(ERROR) << "Malloc failed.";
return RET_ERROR;
}
return RET_OK;
} }
void WinogradOpenCLKernel::SetConstArgs() { int WinogradOpenCLKernel::SetConstArgs() {
AllocateMemory(); AllocateMemory();
int arg_cn = 1; int arg_cn = 1;
cl_int4 input_shape = {batch_size_, OH_, OW_, CI_SLICES_}; // maybe pad=0, so use OH/OW cl_int4 input_shape = {batch_size_, OH_, OW_, CI_SLICES_}; // maybe pad=0, so use OH/OW
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_); if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_); }
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_); if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
arg_cn = 0; arg_cn = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_); }
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_); if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
arg_cn = 2; arg_cn = 2;
cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_}; cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_};
ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_); if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF); MS_LOG(ERROR) << "SetKernelArg failed.";
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape); return RET_ERROR;
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_); }
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_); if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) {
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
return RET_OK;
} }
void WinogradOpenCLKernel::SetGlobalLocal() { void WinogradOpenCLKernel::SetGlobalLocal() {
@ -205,15 +285,30 @@ void WinogradOpenCLKernel::SetGlobalLocal() {
int WinogradOpenCLKernel::Run() { int WinogradOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " winograd Running!"; MS_LOG(DEBUG) << this->name() << " winograd Running!";
MS_LOG(DEBUG) << "winograd kernel0 Running!"; MS_LOG(DEBUG) << "winograd kernel0 Running!";
ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << "winograd kernel1 Running!"; MS_LOG(DEBUG) << "winograd kernel1 Running!";
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_); if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
MS_LOG(DEBUG) << "winograd kernel2 Running!"; MS_LOG(DEBUG) << "winograd kernel2 Running!";
ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()); if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_); MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }

View File

@ -32,7 +32,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {
~WinogradOpenCLKernel() override = default; ~WinogradOpenCLKernel() override = default;
void SetConstArgs() override; int SetConstArgs() override;
void SetGlobalLocal() override; void SetGlobalLocal() override;
int Run() override; int Run() override;
@ -42,8 +42,8 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {
private: private:
int BuildKernel() override; int BuildKernel() override;
void InitFilter() override; int InitFilter() override;
void AllocateMemory(); int AllocateMemory();
cl::Kernel kernel_4x4to36_; cl::Kernel kernel_4x4to36_;
cl::Kernel kernel_36to4x4_; cl::Kernel kernel_36to4x4_;

View File

@ -24,7 +24,7 @@ using mindspore::lite::RET_OK;
using mindspore::lite::opencl::ImageSize; using mindspore::lite::opencl::ImageSize;
namespace mindspore::kernel { namespace mindspore::kernel {
int OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local) { void OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local) {
std::vector<size_t> internal_global_ws = global; std::vector<size_t> internal_global_ws = global;
for (size_t i = 0; i < local.size(); ++i) { for (size_t i = 0; i < local.size(); ++i) {
internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i)); internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i));
@ -50,16 +50,12 @@ int OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std:
if (!local.empty()) { if (!local.empty()) {
local_range_ = cl::NDRange(local.at(0), local.at(1)); local_range_ = cl::NDRange(local.at(0), local.at(1));
} }
} else if (global.size() == 3) { } else if (global.size() >= 3) {
global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2)); global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2));
if (!local.empty()) { if (!local.empty()) {
local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2)); local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2));
} }
} else {
MS_LOG(ERROR) << "Not supported NDRange!";
return RET_ERROR;
} }
return RET_OK;
} }
int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) { int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) {
@ -112,11 +108,17 @@ void OpenCLKernel::PrintOutput(int print_num, const std::string &out_file) {
auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
auto runtime = runtime_wrapper.GetInstance(); auto runtime = runtime_wrapper.GetInstance();
auto allocator = runtime->GetAllocator(); auto allocator = runtime->GetAllocator();
runtime->SyncCommandQueue(); if (!runtime->SyncCommandQueue()) {
MS_LOG(ERROR) << "SyncCommandQueue failed.";
}
if (mem_type == lite::opencl::MemType::BUF) { if (mem_type == lite::opencl::MemType::BUF) {
allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true); if (allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true) == nullptr) {
MS_LOG(ERROR) << "Map Buffer failed.";
}
memcpy(data.data(), tensor->data_c(), img_info.OriginSize); memcpy(data.data(), tensor->data_c(), img_info.OriginSize);
allocator->UnmapBuffer(tensor->data_c()); if (allocator->UnmapBuffer(tensor->data_c()) != RET_OK) {
MS_LOG(ERROR) << "UnmapBuffer failed.";
}
} else { } else {
runtime->ReadImage(tensor->data_c(), data.data()); runtime->ReadImage(tensor->data_c(), data.data());
} }

View File

@ -185,7 +185,7 @@ class OpenCLKernel : public InnerKernel {
ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
} }
~OpenCLKernel() override = default; ~OpenCLKernel() override = default;
int AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local); void AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local);
int Prepare() override { return RET_OK; } int Prepare() override { return RET_OK; }
int PreProcess() override; int PreProcess() override;
@ -194,7 +194,7 @@ class OpenCLKernel : public InnerKernel {
virtual int CheckSpecs(); virtual int CheckSpecs();
virtual int InitWeights() { return RET_OK; } virtual int InitWeights() { return RET_OK; }
virtual void SetConstArgs() {} virtual int SetConstArgs() { return RET_OK; }
virtual void SetGlobalLocal() {} virtual void SetGlobalLocal() {}
virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; } virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; }
virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) { virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) {

View File

@ -420,6 +420,7 @@ int OpenCLSubGraph::Execute() {
return ret; return ret;
} }
if (!ocl_runtime_->SyncCommandQueue()) { if (!ocl_runtime_->SyncCommandQueue()) {
MS_LOG(ERROR) << "SyncCommandQueue failed.";
return RET_ERROR; return RET_ERROR;
} }
return RET_OK; return RET_OK;
@ -449,6 +450,7 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &
return ret; return ret;
} }
if (!ocl_runtime_->SyncCommandQueue()) { if (!ocl_runtime_->SyncCommandQueue()) {
MS_LOG(ERROR) << "SyncCommandQueue failed.";
return RET_ERROR; return RET_ERROR;
} }
return RET_OK; return RET_OK;