From fe438fae9cd453ba3e94a4355a8e9f88c5f3869e Mon Sep 17 00:00:00 2001 From: gongdaguo Date: Tue, 3 Aug 2021 16:20:09 +0800 Subject: [PATCH] fix security check --- .../runtime/gpu/opencl/opencl_allocator.cc | 5 + .../src/runtime/gpu/opencl/opencl_runtime.cc | 35 ++- .../src/runtime/gpu/opencl/opencl_runtime.h | 2 +- .../kernel/opencl/kernel/activation.cc | 42 ++- .../runtime/kernel/opencl/kernel/activation.h | 2 +- .../runtime/kernel/opencl/kernel/argminmax.cc | 71 ++++- .../runtime/kernel/opencl/kernel/argminmax.h | 2 +- .../kernel/opencl/kernel/arithmetic.cc | 69 ++++- .../runtime/kernel/opencl/kernel/arithmetic.h | 2 +- .../kernel/opencl/kernel/arithmetic_self.cc | 22 +- .../kernel/opencl/kernel/arithmetic_self.h | 8 +- .../kernel/opencl/kernel/batch_to_space_nd.cc | 47 +++- .../kernel/opencl/kernel/batch_to_space_nd.h | 2 +- .../runtime/kernel/opencl/kernel/batchnorm.cc | 130 +++++++-- .../runtime/kernel/opencl/kernel/batchnorm.h | 4 +- .../src/runtime/kernel/opencl/kernel/cast.cc | 32 ++- .../src/runtime/kernel/opencl/kernel/cast.h | 2 +- .../runtime/kernel/opencl/kernel/concat.cc | 60 ++++- .../src/runtime/kernel/opencl/kernel/concat.h | 2 +- .../runtime/kernel/opencl/kernel/conv2d.cc | 114 ++++++-- .../src/runtime/kernel/opencl/kernel/conv2d.h | 6 +- .../kernel/opencl/kernel/conv2d_transpose.cc | 93 +++++-- .../kernel/opencl/kernel/conv2d_transpose.h | 2 +- .../kernel/opencl/kernel/depthwise_conv2d.cc | 79 ++++-- .../kernel/opencl/kernel/depthwise_conv2d.h | 2 +- .../src/runtime/kernel/opencl/kernel/fill.cc | 7 +- .../src/runtime/kernel/opencl/kernel/fill.h | 2 +- .../kernel/opencl/kernel/fullconnection.cc | 93 +++++-- .../kernel/opencl/kernel/fullconnection.h | 2 +- .../kernel/opencl/kernel/fusion_eltwise.cc | 62 ++++- .../kernel/opencl/kernel/fusion_eltwise.h | 2 +- .../runtime/kernel/opencl/kernel/gather.cc | 81 ++++-- .../src/runtime/kernel/opencl/kernel/gather.h | 2 +- .../opencl/kernel/int8/arithmetic_int8.cc | 79 ++++-- .../opencl/kernel/int8/arithmetic_int8.h | 2 +- .../kernel/opencl/kernel/layer_norm.cc | 120 +++++++-- .../runtime/kernel/opencl/kernel/layer_norm.h | 2 +- .../runtime/kernel/opencl/kernel/matmul.cc | 81 ++++-- .../src/runtime/kernel/opencl/kernel/matmul.h | 4 +- .../runtime/kernel/opencl/kernel/one_hot.cc | 60 ++++- .../runtime/kernel/opencl/kernel/one_hot.h | 2 +- .../src/runtime/kernel/opencl/kernel/pad.cc | 48 +++- .../src/runtime/kernel/opencl/kernel/pad.h | 2 +- .../runtime/kernel/opencl/kernel/pooling2d.cc | 50 +++- .../runtime/kernel/opencl/kernel/pooling2d.h | 2 +- .../src/runtime/kernel/opencl/kernel/power.cc | 55 +++- .../src/runtime/kernel/opencl/kernel/power.h | 2 +- .../src/runtime/kernel/opencl/kernel/prelu.cc | 56 +++- .../src/runtime/kernel/opencl/kernel/prelu.h | 2 +- .../runtime/kernel/opencl/kernel/reduce.cc | 38 ++- .../src/runtime/kernel/opencl/kernel/reduce.h | 2 +- .../runtime/kernel/opencl/kernel/reshape.cc | 42 ++- .../runtime/kernel/opencl/kernel/reshape.h | 2 +- .../runtime/kernel/opencl/kernel/resize.cc | 45 +++- .../src/runtime/kernel/opencl/kernel/resize.h | 2 +- .../src/runtime/kernel/opencl/kernel/scale.cc | 100 +++++-- .../src/runtime/kernel/opencl/kernel/scale.h | 2 +- .../runtime/kernel/opencl/kernel/softmax.cc | 35 ++- .../runtime/kernel/opencl/kernel/softmax.h | 2 +- .../kernel/opencl/kernel/space_to_batch_nd.cc | 47 +++- .../kernel/opencl/kernel/space_to_batch_nd.h | 2 +- .../kernel/opencl/kernel/space_to_depth.cc | 50 +++- .../kernel/opencl/kernel/space_to_depth.h | 2 +- .../kernel/opencl/kernel/sparse_to_dense.cc | 86 ++++-- .../kernel/opencl/kernel/sparse_to_dense.h | 2 +- .../src/runtime/kernel/opencl/kernel/split.cc | 81 ++++-- .../src/runtime/kernel/opencl/kernel/split.h | 4 +- .../src/runtime/kernel/opencl/kernel/stack.cc | 57 +++- .../src/runtime/kernel/opencl/kernel/stack.h | 2 +- .../runtime/kernel/opencl/kernel/strassen.cc | 255 +++++++++++++----- .../runtime/kernel/opencl/kernel/strassen.h | 24 +- .../kernel/opencl/kernel/strided_slice.cc | 55 +++- .../kernel/opencl/kernel/strided_slice.h | 2 +- .../runtime/kernel/opencl/kernel/to_format.cc | 35 ++- .../runtime/kernel/opencl/kernel/to_format.h | 2 +- .../runtime/kernel/opencl/kernel/transpose.cc | 40 ++- .../runtime/kernel/opencl/kernel/transpose.h | 2 +- .../runtime/kernel/opencl/kernel/winograd.cc | 151 +++++++++-- .../runtime/kernel/opencl/kernel/winograd.h | 6 +- .../runtime/kernel/opencl/opencl_kernel.cc | 20 +- .../src/runtime/kernel/opencl/opencl_kernel.h | 4 +- .../runtime/kernel/opencl/opencl_subgraph.cc | 2 + 82 files changed, 2222 insertions(+), 632 deletions(-) diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc index 3507e3dcb01..dbc917a4d40 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc @@ -108,12 +108,15 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi } if (*image == nullptr) { delete *buffer; + *buffer = nullptr; MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; return nullptr; } if (ret != CL_SUCCESS) { delete *buffer; delete *image; + *buffer = nullptr; + *image = nullptr; MS_LOG(ERROR) << "Create OpenCL Image2D (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; return nullptr; } @@ -125,6 +128,8 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi if (host_ptr == nullptr) { delete *buffer; delete *image; + *buffer = nullptr; + *image = nullptr; MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr; return nullptr; } diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc index c47847c5998..4bac5664132 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc @@ -210,6 +210,7 @@ int OpenCLRuntime::InitQueue(std::vector *platforms) { #endif if (context_ == nullptr || ret != CL_SUCCESS) { delete device_; + device_ = nullptr; MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -218,6 +219,8 @@ int OpenCLRuntime::InitQueue(std::vector *platforms) { if (default_command_queue_ == nullptr || ret != CL_SUCCESS) { delete device_; delete context_; + device_ = nullptr; + context_ = nullptr; MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -227,6 +230,9 @@ int OpenCLRuntime::InitQueue(std::vector *platforms) { delete device_; delete context_; delete default_command_queue_; + device_ = nullptr; + context_ = nullptr; + default_command_queue_ = nullptr; MS_LOG(ERROR) << "Profiling command Queue create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -291,6 +297,10 @@ int OpenCLRuntime::Init() { delete context_; delete default_command_queue_; delete profiling_command_queue_; + device_ = nullptr; + context_ = nullptr; + default_command_queue_ = nullptr; + profiling_command_queue_ = nullptr; MS_LOG(ERROR) << "Command OpenCL allocator failed!"; return RET_ERROR; } @@ -305,7 +315,9 @@ int OpenCLRuntime::Uninit() { if (init_state_ != InitSuccess) { return RET_OK; } - StoreCache(); + if (StoreCache() != RET_OK) { + MS_LOG(ERROR) << "StoreCache failed!"; + } program_map_.clear(); delete default_command_queue_; delete profiling_command_queue_; @@ -574,12 +586,15 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { - return RET_OK; + return RET_ERROR; } if (command_queue == nullptr) { command_queue = default_command_queue_; } - return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr); + if (clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr) != CL_SUCCESS) { + return RET_ERROR; + } + return RET_OK; } void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector ®ion, @@ -720,17 +735,17 @@ void OpenCLRuntime::LoadCache() { MS_LOG(INFO) << "Init opencl cache success"; } -void OpenCLRuntime::StoreCache() { +int OpenCLRuntime::StoreCache() { if (!enable_cache_) { - return; + return RET_OK; } if (!flush_cache_) { - return; + return RET_OK; } auto fbb = std::make_unique(); if (fbb == nullptr) { MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail"; - return; + return RET_ERROR; } std::vector> program_binarys; for (const auto &kv : program_map_) { @@ -753,8 +768,12 @@ void OpenCLRuntime::StoreCache() { auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data); fbb->Finish(gpu_cache); uint8_t *buf = fbb->GetBufferPointer(); - WriteToBin(cache_path_, reinterpret_cast(buf), fbb->GetSize()); + if (WriteToBin(cache_path_, reinterpret_cast(buf), fbb->GetSize()) != RET_OK) { + MS_LOG(ERROR) << "WriteToBin failed."; + return RET_ERROR; + } MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize(); + return RET_OK; } cl::Buffer *OpenCLRuntime::CreateSharedMemoryBuffer(size_t size, void *host_ptr) { diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h index 788be5ea97b..024b7b70456 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h @@ -203,7 +203,7 @@ class OpenCLRuntime { // for cache private: void LoadCache(); - void StoreCache(); + int StoreCache(); #ifdef MS_OPENCL_BINARY_CACHE bool enable_cache_{true}; #else diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index 0ac112b88d3..f7dab80ed41 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -65,37 +65,53 @@ int ActivationOpenCLKernel::CheckSpecs() { int ActivationOpenCLKernel::Prepare() { outShape = GpuTensorInfo(out_tensors_[0]); std::string source = activation_source; - std::string program_name = "Activation"; + const std::string program_name = "Activation"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; } - std::string kernel_name = GetActTypeString(type_); + const std::string kernel_name = GetActTypeString(type_); auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); if (ret != RET_OK) { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " init Done!"; return RET_OK; } -void ActivationOpenCLKernel::SetConstArgs() { +int ActivationOpenCLKernel::SetConstArgs() { int arg_idx = 2; cl_int2 image_size = {static_cast(outShape.width), static_cast(outShape.height)}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (type_ == ActivationType_LEAKY_RELU) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } if (type_ == ActivationType_SIGMOID) { int c4 = outShape.Slice; int last_c4 = outShape.C % 4 == 0 ? 4 : outShape.C % 4; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void ActivationOpenCLKernel::SetGlobalLocal() { @@ -107,8 +123,14 @@ void ActivationOpenCLKernel::SetGlobalLocal() { int ActivationOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail."; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h index 0c47e8955a3..7031a9a8f9e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h @@ -35,7 +35,7 @@ class ActivationOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc index 48e0cfe5054..8d7118776a5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include "src/kernel_registry.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/kernel/argminmax.h" @@ -58,19 +59,41 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() { return RET_OK; } -void ArgMinMaxOpenCLKernel::SetConstArgs() { +int ArgMinMaxOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(op_parameter_); cl_int4 in_shape{static_cast(im_in_.N), static_cast(im_in_.H), static_cast(im_in_.W), static_cast(im_in_.C)}; cl_int4 flags = {param->out_value_, param->get_max_, param->axis_, param->topk_}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ArgMinMaxOpenCLKernel::SetGlobalLocal() { @@ -134,14 +157,22 @@ int ArgMinMaxOpenCLKernel::InitWeights() { auto allocator = ocl_runtime_->GetAllocator(); int dtype_size = ocl_runtime_->GetFp16Enable() ? sizeof(int16_t) : sizeof(float); buff_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * dtype_size, lite::opencl::MemType::BUF); + if (buff_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } ids_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * sizeof(int32_t), lite::opencl::MemType::BUF); + if (ids_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } return RET_OK; } int ArgMinMaxOpenCLKernel::Prepare() { - std::string kernel_name = "argminmax"; + const std::string kernel_name = "argminmax"; std::string source = argminmax_source; - std::string program_name = "argminmax"; + const std::string program_name = "argminmax"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -162,16 +193,28 @@ int ArgMinMaxOpenCLKernel::Prepare() { InitWeights(); SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } int ArgMinMaxOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h index ec3b70ce256..220949e3e2c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h @@ -32,7 +32,7 @@ class ArgMinMaxOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int InitWeights() override; int Tune() override { return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 44ff1a45694..b5afadce8a3 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -98,6 +98,10 @@ int ArithmeticOpenCLKernel::InitWeights() { size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{in_shape.width, in_shape.height, dtype}; auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); + if (weight_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } weight_ptrs_.push_back(weight_ptr_); } else { weight_ptrs_.push_back(nullptr); @@ -106,7 +110,7 @@ int ArithmeticOpenCLKernel::InitWeights() { return RET_OK; } -void ArithmeticOpenCLKernel::SetConstArgs() { +int ArithmeticOpenCLKernel::SetConstArgs() { int arg_idx = 3; if (!element_flag_) { cl_int4 in0_shape = {static_cast(in0_shape_.N), static_cast(in0_shape_.H), static_cast(in0_shape_.W), @@ -121,16 +125,38 @@ void ArithmeticOpenCLKernel::SetConstArgs() { } else if (in0_shape_.C != 1 && in1_shape_.C == 1) { broadcastC_flag = 2; // BroadCast C4 in input1 } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { cl_int2 output_shape{static_cast(global_range_[0]), static_cast(global_range_[1])}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int ArithmeticOpenCLKernel::Prepare() { @@ -179,7 +205,7 @@ int ArithmeticOpenCLKernel::Prepare() { activation_max_ = 6.f; } - std::string program_name = "Arithmetic"; + const std::string program_name = "Arithmetic"; std::string source = arithmetic_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -196,7 +222,10 @@ int ArithmeticOpenCLKernel::Prepare() { if (type() != PrimitiveType_BiasAdd) { InitWeights(); } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name_ << " Init Done!"; return RET_OK; } @@ -206,10 +235,22 @@ int ArithmeticOpenCLKernel::Run() { auto input_0_ptr = weight_ptrs_[0] == nullptr ? in_tensors_[0]->data_c() : weight_ptrs_[0]; auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1]; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h index ff7bfa922b1..e19386cf3b4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h @@ -35,7 +35,7 @@ class ArithmeticOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 4a30f4c33c6..dbc619ab884 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -86,7 +86,7 @@ int ArithmeticSelfOpenCLKernel::Prepare() { kernel_name += std::string(schema::EnumNamePrimitiveType(type())) + "_NHWC4"; } MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; - std::string program_name = "ArithmeticSelf"; + const std::string program_name = "ArithmeticSelf"; if (!ocl_runtime_->LoadSource(program_name, arithmeticself_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -98,15 +98,27 @@ int ArithmeticSelfOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } int ArithmeticSelfOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h index 2419ee40783..4cd9e2ba16a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h @@ -47,7 +47,13 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); } + int SetConstArgs() override { + if (ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; + } void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc index c0dbd556b05..105b5abb051 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc @@ -55,7 +55,7 @@ int BatchToSpaceNDOpenCLKernel::CheckSpecs() { return RET_OK; } -void BatchToSpaceNDOpenCLKernel::SetConstArgs() { +int BatchToSpaceNDOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(this->op_parameter_); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); @@ -66,10 +66,23 @@ void BatchToSpaceNDOpenCLKernel::SetConstArgs() { cl_int4 paddings = {param->crops_[0], param->crops_[1], param->crops_[2], param->crops_[3]}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() { @@ -82,9 +95,9 @@ void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() { } int BatchToSpaceNDOpenCLKernel::Prepare() { - std::string kernel_name = "batch_to_space_nd_NHWC4"; + const std::string kernel_name = "batch_to_space_nd_NHWC4"; std::string source = batch_to_space_nd_source; - std::string program_name = "batch_to_space_nd"; + const std::string program_name = "batch_to_space_nd"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -96,16 +109,28 @@ int BatchToSpaceNDOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } int BatchToSpaceNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h index aeeced68781..df756af6778 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h @@ -32,7 +32,7 @@ class BatchToSpaceNDOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index b135ed41c3d..56577306bbe 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -59,15 +59,25 @@ void BatchNormGetWorkGroup(const std::vector &global, std::vectorpush_back(z); } -void BatchNormOpenCLKernel::SetConstArgs() { +int BatchNormOpenCLKernel::SetConstArgs() { int arg_cn = 6; auto param = reinterpret_cast(this->op_parameter_); auto input0_shape = in_tensors_.at(0)->shape(); cl_int4 input_shape_ = {input0_shape.at(0), input0_shape.at(1), input0_shape.at(2), UP_DIV(input0_shape.at(3), C4NUM)}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void BatchNormOpenCLKernel::SetGlobalLocal() { @@ -83,6 +93,41 @@ void BatchNormOpenCLKernel::SetGlobalLocal() { OpenCLKernel::AlignGlobalLocal(global_size_, local_size_); } +int BatchNormOpenCLKernel::UnmapBuffer() { + auto allocator = ocl_runtime_->GetAllocator(); + if (allocator->UnmapBuffer(scale_) != RET_OK) { + return RET_ERROR; + } + if (allocator->UnmapBuffer(offset_) != RET_OK) { + return RET_ERROR; + } + if (allocator->UnmapBuffer(mean_) != RET_OK) { + return RET_ERROR; + } + if (allocator->UnmapBuffer(variance_) != RET_OK) { + return RET_ERROR; + } + return RET_OK; +} + +int BatchNormOpenCLKernel::MapBuffer() { + auto allocator = ocl_runtime_->GetAllocator(); + if (allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + if (allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + if (allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + if (allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + + return RET_OK; +} + int BatchNormOpenCLKernel::Initweight() { auto allocator = ocl_runtime_->GetAllocator(); GpuTensorInfo img_info(in_tensors_.at(1)); @@ -90,15 +135,30 @@ int BatchNormOpenCLKernel::Initweight() { size_t weight_size = img_info.OriginSize; // allocated memory for weight and init value scale_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (scale_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (offset_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } mean_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (mean_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } variance_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (variance_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } - allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true); - + if (MapBuffer() != RET_OK) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(scale_, 1, weight_size); memset(offset_, 0x00, weight_size); memset(mean_, 0x00, weight_size); @@ -153,18 +213,18 @@ int BatchNormOpenCLKernel::Initweight() { memcpy(variance_, in_tensors_.at(4)->data_c(), weight_size); } } - allocator->UnmapBuffer(scale_); - allocator->UnmapBuffer(offset_); - allocator->UnmapBuffer(mean_); - allocator->UnmapBuffer(variance_); + if (UnmapBuffer() != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } int BatchNormOpenCLKernel::Prepare() { use_fp16_enable_ = ocl_runtime_->GetFp16Enable(); - std::string kernel_name = "Batch_normalization_NHWC4"; + const std::string kernel_name = "Batch_normalization_NHWC4"; std::string source = batchnorm_source; - std::string program_name = "Batch_normalization"; + const std::string program_name = "Batch_normalization"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -181,7 +241,10 @@ int BatchNormOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Initweight failed "; return RET_ERROR; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; @@ -190,13 +253,34 @@ int BatchNormOpenCLKernel::Prepare() { int BatchNormOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF); // scale - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF); // offset - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF); // mean - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF); // variance - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); // out tensor - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // input tensor + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // scale + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // offset + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // mean + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // variance + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // out tensor + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h index 80b217febba..7f7b90710d5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h @@ -32,11 +32,13 @@ class BatchNormOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: int Initweight(); + int UnmapBuffer(); + int MapBuffer(); private: bool use_fp16_enable_{false}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc index b022b270417..08e24d4fd68 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc @@ -52,9 +52,13 @@ int CastOpenCLKernel::CheckSpecs() { return RET_OK; } -void CastOpenCLKernel::SetConstArgs() { +int CastOpenCLKernel::SetConstArgs() { cl_int2 shape = {static_cast(shape_.width), static_cast(shape_.height)}; - ocl_runtime_->SetKernelArg(kernel_, 2, shape); + if (ocl_runtime_->SetKernelArg(kernel_, 2, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void CastOpenCLKernel::SetGlobalLocal() { @@ -68,8 +72,8 @@ int CastOpenCLKernel::Prepare() { {kNumberTypeFloat32, "fp32"}, {kNumberTypeFloat16, "fp16"}, }; - std::string program_name = "Cast"; - std::string kernel_name = + const std::string program_name = "Cast"; + const std::string kernel_name = "Cast_" + dtype_names[in_tensors_.front()->data_type()] + "_to_" + dtype_names[out_tensors_.front()->data_type()]; if (!ocl_runtime_->LoadSource(program_name, cast_source)) { MS_LOG(ERROR) << "Load source failed."; @@ -80,16 +84,28 @@ int CastOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } int CastOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h index 3db1f15a008..68fc43cd6c9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h @@ -31,7 +31,7 @@ class CastOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index 6beebbfbe29..05a986da862 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -38,7 +38,10 @@ int ConcatOpenCLKernel::RunAxis0() { auto *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); for (int i = 0; i < in_tensors_.size(); i++) { auto src_data = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; auto *input_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -107,7 +110,7 @@ int ConcatOpenCLKernel::CheckSpecs() { return RET_OK; } -void ConcatOpenCLKernel::SetConstArgs() { +int ConcatOpenCLKernel::SetConstArgs() { GpuTensorInfo img_info(out_tensors_[0]); size_t dtype = ocl_runtime_->GetFp16Enable() ? sizeof(cl_half) : sizeof(cl_float); stride_w = img_info.RowPitch() / dtype; @@ -124,9 +127,15 @@ void ConcatOpenCLKernel::SetConstArgs() { temp.s[j] = in_tensor->shape()[j]; } Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); } else { for (auto &in_tensor : in_tensors_) { cl_int4 temp = {}; @@ -135,11 +144,18 @@ void ConcatOpenCLKernel::SetConstArgs() { } Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ConcatOpenCLKernel::SetGlobalLocal() { @@ -190,6 +206,10 @@ int ConcatOpenCLKernel::ConvertWeightToTensor() { } ImageSize img_size{in_shape.width, in_shape.height, dtype}; auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); + if (weight_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } weight_ptrs_.push_back(weight_ptr_); } else { weight_ptrs_.push_back(nullptr); @@ -222,7 +242,7 @@ int ConcatOpenCLKernel::Prepare() { kernel_name += "_NHWC4"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::string source = concat_source; - std::string program_name = "Concat"; + const std::string program_name = "Concat"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -234,7 +254,10 @@ int ConcatOpenCLKernel::Prepare() { return ret; } MS_LOG(DEBUG) << kernel_name << " Init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } @@ -247,14 +270,27 @@ int ConcatOpenCLKernel::Run() { int arg_cn = 0; for (int i = 0; i < in_tensors_.size(); ++i) { auto input_ptr = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } if (axis_ == 3 && !Align_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h index 9b3ffae6bb4..363888eaf2c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h @@ -31,7 +31,7 @@ class ConcatOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc index 26f77796123..bfed62a5129 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc @@ -108,7 +108,10 @@ int Conv2DOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } @@ -142,7 +145,7 @@ void Conv2DOpenCLKernel::InitAttrs() { int Conv2DOpenCLKernel::BuildKernel() { SetBlockSize(); - std::string program_name = "conv2d"; + const std::string program_name = "conv2d"; std::stringstream kernel_name; kernel_name << "Conv2D_H" << block_size_.H << "W" << block_size_.W << "C" << block_size_.C; if (filter_type_ == MemType::IMG) { @@ -245,9 +248,11 @@ void Conv2DOpenCLKernel::SetMaliFp16BlockSize(int task_size_per_cu, bool w_kerne } int Conv2DOpenCLKernel::InitWeights() { - InitFilter(); + if (InitFilter() != RET_OK) { + return RET_ERROR; + } if (has_bias_) { - InitBias(); + return InitBias(); } return RET_OK; } @@ -300,7 +305,7 @@ void ConvertFilter(void *src, void *dst, TypeId src_dtype, TypeId dst_dtype, Fil } } -void Conv2DOpenCLKernel::InitFilter() { +int Conv2DOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); // allocate opencl memory: buffer or image2d @@ -312,9 +317,17 @@ void Conv2DOpenCLKernel::InitFilter() { size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size = width * height * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc({width, height, dtype}); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { size = UP_DIV(CO_SLICES_, Ogroup) * KH_ * KW_ * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc(size, lite::opencl::MemType::BUF); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } // rearrange filter @@ -333,15 +346,22 @@ void Conv2DOpenCLKernel::InitFilter() { if (filter_type_ == MemType::IMG) { ocl_runtime_->WriteImage(packed_filter_, tmp.data()); } else { - allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memcpy(packed_filter_, tmp.data(), size); - allocator->UnmapBuffer(packed_filter_); + if (allocator->UnmapBuffer(packed_filter_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } FreeStoredData(stored_filter_); + return RET_OK; } -void Conv2DOpenCLKernel::InitBias() { +int Conv2DOpenCLKernel::InitBias() { auto allocator = ocl_runtime_->GetAllocator(); // align bias from C to C4 @@ -349,8 +369,15 @@ void Conv2DOpenCLKernel::InitBias() { void *src_data = stored_bias_ == nullptr ? bias_tensor->data_c() : stored_bias_; size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_; packed_bias_ = allocator->Malloc(packed_bias_size, lite::opencl::MemType::BUF); + if (packed_bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } - allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(packed_bias_, 0x00, packed_bias_size); if (bias_tensor->data_type() == kNumberTypeFloat16) { if (use_fp16_) { @@ -375,11 +402,15 @@ void Conv2DOpenCLKernel::InitBias() { memcpy(packed_bias_, src_data, CO_ * sizeof_FLT_); } } - allocator->UnmapBuffer(packed_bias_); + if (allocator->UnmapBuffer(packed_bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); + return RET_OK; } -void Conv2DOpenCLKernel::SetConstArgs() { +int Conv2DOpenCLKernel::SetConstArgs() { cl_int4 input_shape = {batch_size_, IH_, IW_, CI_SLICES_}; cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_}; cl_int4 kernel_stride = {KH_, KW_, param_->stride_h_, param_->stride_w_}; @@ -387,15 +418,43 @@ void Conv2DOpenCLKernel::SetConstArgs() { cl_int2 dilation = {param_->dilation_h_, param_->dilation_w_}; int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void Conv2DOpenCLKernel::SetGlobalLocal() { @@ -429,9 +488,18 @@ void Conv2DOpenCLKernel::SetGlobalLocal() { int Conv2DOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h index f12ec7124f7..751b960774a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h @@ -53,7 +53,7 @@ class Conv2DOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int Prepare() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; @@ -78,8 +78,8 @@ class Conv2DOpenCLKernel : public OpenCLKernel { protected: void InitAttrs(); virtual int BuildKernel(); - virtual void InitFilter(); - void InitBias(); + virtual int InitFilter(); + int InitBias(); bool use_fp16_{false}; size_t sizeof_FLT_{4}; ConvParameter *param_{nullptr}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index c3a5d528ecb..16bd63384c5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -55,10 +55,10 @@ int Conv2dTransposeOpenCLKernel::CheckSpecs() { } int Conv2dTransposeOpenCLKernel::Prepare() { - std::string kernel_name = "conv2d_transpose"; + const std::string kernel_name = "conv2d_transpose"; enable_fp16_ = ocl_runtime_->GetFp16Enable(); std::string source = GetActDefines() + conv2d_transpose_source; - std::string program_name = "conv2d_transpose"; + const std::string program_name = "conv2d_transpose"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -74,7 +74,10 @@ int Conv2dTransposeOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -94,7 +97,7 @@ void Conv2dTransposeOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void Conv2dTransposeOpenCLKernel::SetConstArgs() { +int Conv2dTransposeOpenCLKernel::SetConstArgs() { int arg_cnt = 2; auto *param = reinterpret_cast(op_parameter_); int ci = in_tensors_[0]->shape()[3]; @@ -115,14 +118,39 @@ void Conv2dTransposeOpenCLKernel::SetConstArgs() { cl_int2 padding = {pad_h, pad_w}; cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), n}; cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), n}; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast(param->act_type_)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast(param->act_type_)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int Conv2dTransposeOpenCLKernel::InitWeights() { @@ -147,7 +175,15 @@ int Conv2dTransposeOpenCLKernel::InitFilter() { // IHWO to OHWI4(I)4(O)(converter format is IHWO) // init padWeight_(buffer mem) padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); auto origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_; auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type(); @@ -188,7 +224,10 @@ int Conv2dTransposeOpenCLKernel::InitFilter() { } } } - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_weight_); return RET_OK; } @@ -208,7 +247,15 @@ int Conv2dTransposeOpenCLKernel::InitBias() { } ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; bias_ = allocator->Malloc(img_size); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(bias_, 0x00, div_co * C4NUM * data_size); if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; @@ -225,7 +272,10 @@ int Conv2dTransposeOpenCLKernel::InitBias() { memcpy(bias_, src_data, co * data_size); } } - allocator->UnmapBuffer(bias_); + if (allocator->UnmapBuffer(bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); return RET_OK; } @@ -233,9 +283,18 @@ int Conv2dTransposeOpenCLKernel::InitBias() { int Conv2dTransposeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_cnt = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h index 70caeb50ced..b709dee59b0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h @@ -34,7 +34,7 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel { int InitWeights() override; int InitFilter(); int InitBias(); - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int StoreConstData() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 7e9f7f7b572..73733bafd20 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -73,7 +73,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { } else { block_size_.C = block_size_.H = block_size_.W = 1; } - std::string program_name = "DepthwiseConv2d"; + const std::string program_name = "DepthwiseConv2d"; std::string source = depthwise_conv2d_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -94,7 +94,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast(out_mem_type_); return RET_OK; } @@ -153,10 +156,12 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() { size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype}; packed_weight_ = allocator->Malloc(img_size, temp_filter.data()); + } else { packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data()); } if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; return RET_ERROR; } FreeStoredData(stored_weight_); @@ -199,13 +204,15 @@ int DepthwiseConv2dOpenCLKernel::InitBias() { } bias_data_ = allocator->Malloc(bias_size, temp_bias.data()); if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; return RET_ERROR; } + FreeStoredData(stored_bias_); return RET_OK; } -void DepthwiseConv2dOpenCLKernel::SetConstArgs() { +int DepthwiseConv2dOpenCLKernel::SetConstArgs() { auto parameter = reinterpret_cast(op_parameter_); auto in_info = GpuTensorInfo(in_tensors_[0]); auto out_info = GpuTensorInfo(out_tensors_[0]); @@ -222,16 +229,47 @@ void DepthwiseConv2dOpenCLKernel::SetConstArgs() { cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() { @@ -286,9 +324,18 @@ int DepthwiseConv2dOpenCLKernel::StoreConstData() { int DepthwiseConv2dOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h index 8fdbed9d1bd..91626bb9606 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h @@ -41,7 +41,7 @@ class DepthwiseConv2dOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int InitWeights() override; int InitBias(); - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int StoreConstData() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc index a42d0f9b9d1..dac1c248bcf 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc @@ -35,7 +35,10 @@ int FillOpenCLKernel::RunFill() { cl_int4 fill_value = {}; fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_; auto src_data = out_tensors_[0]->data_c(); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -59,7 +62,7 @@ int FillOpenCLKernel::RunShape() { return RET_OK; } -void FillOpenCLKernel::SetConstArgs() {} +int FillOpenCLKernel::SetConstArgs() { return RET_OK; } void FillOpenCLKernel::SetGlobalLocal() {} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h index e60da1d447a..0828414c7b6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h @@ -31,7 +31,7 @@ class FillOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index 00971e0b5fa..f86b979bf9c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -98,7 +98,7 @@ int FullConnectionOpenCLKernel::Prepare() { kernel_name = "FullConnectionWeightVar"; } std::string source = fullconnection_source; - std::string program_name = "FullConnection"; + const std::string program_name = "FullConnection"; if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -113,7 +113,10 @@ int FullConnectionOpenCLKernel::Prepare() { if (ret != RET_OK) { return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; @@ -137,7 +140,15 @@ int FullConnectionOpenCLKernel::InitFilter() { size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size); @@ -183,7 +194,10 @@ int FullConnectionOpenCLKernel::InitFilter() { } } } - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_weight_); return RET_OK; } @@ -202,7 +216,15 @@ int FullConnectionOpenCLKernel::InitBias() { } ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; bias_ = allocator->Malloc(img_size); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(bias_, 0x00, co4 * C4NUM * dtype_size); if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; @@ -218,7 +240,10 @@ int FullConnectionOpenCLKernel::InitBias() { memcpy(bias_, src_data, CO_ * dtype_size); } } - allocator->UnmapBuffer(bias_); + if (allocator->UnmapBuffer(bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); return RET_OK; } @@ -231,22 +256,44 @@ void FullConnectionOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void FullConnectionOpenCLKernel::SetConstArgs() { +int FullConnectionOpenCLKernel::SetConstArgs() { if (!weight_var_) { - ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } int arg_count = 3; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto intensor_shape = GpuTensorInfo(in_tensors_[0]); int CI4 = CI_remainder_ * intensor_shape.Slice; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto in_shape_info = GpuTensorInfo(in_tensors_[0]); cl_int2 in_img_shape = {static_cast(in_shape_info.height), static_cast(in_shape_info.width)}; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto *param = reinterpret_cast(op_parameter_); - ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast(param->act_type_)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast(param->act_type_)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int FullConnectionOpenCLKernel::StoreConstData() { @@ -270,12 +317,24 @@ int FullConnectionOpenCLKernel::StoreConstData() { int FullConnectionOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_count = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - if (weight_var_) { - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (weight_var_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h index be830de30ee..09bc05d2f74 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h @@ -31,7 +31,7 @@ class FullConnectionOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } int StoreConstData() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc index f96d4583eb1..faaa7e81a00 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc @@ -164,8 +164,8 @@ bool IsEltwiseAndOperatorSupported(LiteKernel *node) { int FusionEltwiseOpenCLKernel::Prepare() { std::string source = Codegen(); - std::string program_name = "FusionEltwise\n" + source; - std::string kernel_name = "FusionEltwise"; + const std::string program_name = "FusionEltwise\n" + source; + const std::string kernel_name = "FusionEltwise"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -183,7 +183,10 @@ int FusionEltwiseOpenCLKernel::Prepare() { } InitWeights(); SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } @@ -217,7 +220,14 @@ int FusionEltwiseOpenCLKernel::InitWeights() { size_t num = tensor_info.ElementsNum; size_t size = tensor_info.Image2DSize; void *buffer = allocator->Malloc(size, lite::opencl::MemType::BUF); - allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true); + if (buffer == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(buffer, 0x00, size); if (tensor->data_type() == kNumberTypeFloat16) { if (use_fp16) { @@ -232,7 +242,10 @@ int FusionEltwiseOpenCLKernel::InitWeights() { CopyNumber(buffer, tensor->data_c(), num); } } - allocator->UnmapBuffer(buffer); + if (allocator->UnmapBuffer(buffer) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } buffer_weights_.push_back(buffer); } } @@ -247,7 +260,7 @@ void FusionEltwiseOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void FusionEltwiseOpenCLKernel::SetConstArgs() { +int FusionEltwiseOpenCLKernel::SetConstArgs() { auto output = GpuTensorInfo(out_tensors_.front()); cl_int4 output_shape = {static_cast(output.N), static_cast(output.H), static_cast(output.W), static_cast(output.C)}; @@ -260,18 +273,32 @@ void FusionEltwiseOpenCLKernel::SetConstArgs() { if (IsScalar(in_tensor->shape())) { if (ocl_runtime_->GetFp16Enable()) { auto value = static_cast(scalar_weights_[scalar_idx++]); - ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast(&value))); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast(&value))) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } arg_idx++; // for act input } arg_idx++; // for output - ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int FusionEltwiseOpenCLKernel::Run() { @@ -279,12 +306,21 @@ int FusionEltwiseOpenCLKernel::Run() { int arg_idx = 0; for (auto *in_tensor : in_tensors_) { if (!in_tensor->IsConst()) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } arg_idx++; } - ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h index 800c1aa4c0a..b585273cfad 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h @@ -162,7 +162,7 @@ class FusionEltwiseOpenCLKernel : public OpenCLKernel { int Prepare() override; int InitWeights() override; void SetGlobalLocal() override; - void SetConstArgs() override; + int SetConstArgs() override; int Run() override; void ClearParameter() { op_parameter_ = nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc index 251c0df94c1..68dbaf98b4b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc @@ -81,7 +81,7 @@ int GatherOpenCLKernel::CheckSpecs() { } } -void GatherOpenCLKernel::SetConstArgs() { +int GatherOpenCLKernel::SetConstArgs() { auto input = GpuTensorInfo(in_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front()); int indices_num = in_tensors_.at(1)->ElementsNum(); @@ -90,10 +90,23 @@ void GatherOpenCLKernel::SetConstArgs() { cl_int4 dst_size = {static_cast(output.W), static_cast(output.H), static_cast(output.Slice), static_cast(output.N)}; int arg_cnt = 3; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void GatherOpenCLKernel::SetGlobalLocal() { @@ -104,11 +117,11 @@ void GatherOpenCLKernel::SetGlobalLocal() { } int GatherOpenCLKernel::Prepare() { - std::string kernel_name = "gather"; + const std::string kernel_name = "gather"; if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) { axis_ = 3; } - std::string program_name = "gather"; + const std::string program_name = "gather"; if (!ocl_runtime_->LoadSource(program_name, gather_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -127,7 +140,10 @@ int GatherOpenCLKernel::Prepare() { } } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -135,11 +151,21 @@ int GatherOpenCLKernel::Prepare() { int GatherOpenCLKernel::ConvertTensorToweight() { auto allocator = ocl_runtime_->GetAllocator(); auto indices_tensor = in_tensors_.at(1); - allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto indices_num = indices_tensor->ElementsNum(); indices_data_ = reinterpret_cast(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF)); - allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); + if (indices_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } if (indices_data_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; @@ -155,8 +181,14 @@ int GatherOpenCLKernel::ConvertTensorToweight() { << " But Your type is :" << data_type; return RET_ERROR; } - allocator->UnmapBuffer(indices_data_); - allocator->UnmapBuffer(indices_tensor->data_c()); + if (allocator->UnmapBuffer(indices_data_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } + if (allocator->UnmapBuffer(indices_tensor->data_c()) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } @@ -197,7 +229,10 @@ int GatherOpenCLKernel::PreProcess() { if (!InferShapeDone()) { auto indices_tensor = in_tensors_[1]; if (!indices_tensor->IsConst()) { - ocl_runtime_->SyncCommandQueue(); + if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + return RET_ERROR; + } indices_tensor->MutableData(); } } @@ -209,10 +244,22 @@ int GatherOpenCLKernel::Run() { if (intensor1_is_tensor) { ConvertTensorToweight(); } - ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h index 5ec2047f2d0..78f3e2d531b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h @@ -34,7 +34,7 @@ class GatherOpenCLKernel : public OpenCLKernel { int PreProcess() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } int ConvertTensorToweight(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc index b803bae593e..74504b8e983 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc @@ -98,6 +98,10 @@ int ArithmeticInt8OpenCLKernel::InitWeights() { size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{in_shape.width, in_shape.height, dtype}; auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); + if (weight_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } weight_ptrs_.push_back(weight_ptr_); } else { weight_ptrs_.push_back(nullptr); @@ -106,7 +110,7 @@ int ArithmeticInt8OpenCLKernel::InitWeights() { return RET_OK; } -void ArithmeticInt8OpenCLKernel::SetConstArgs() { +int ArithmeticInt8OpenCLKernel::SetConstArgs() { int arg_idx = 3; if (!element_flag_) { cl_int4 in0_shape = {static_cast(in0_shape_.N), static_cast(in0_shape_.H), static_cast(in0_shape_.W), @@ -121,16 +125,37 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() { } else if (in0_shape_.C != 1 && in1_shape_.C == 1) { broadcastC_flag = 2; // BroadCast C4 in input1 } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { cl_int2 output_shape{static_cast(global_range_[0]), static_cast(global_range_[1])}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_); // set quantization parameter. auto input0_quant_param = in_tensors_[0]->quant_params().front(); @@ -141,8 +166,15 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() { cl_char4 zero_point = {static_cast(input0_quant_param.zeroPoint), static_cast(input1_quant_param.zeroPoint), static_cast(output_quant_param.zeroPoint), 0}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); // scale - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point); // zero_point + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // scale + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // zero_point + return RET_OK; } int ArithmeticInt8OpenCLKernel::Prepare() { @@ -191,7 +223,7 @@ int ArithmeticInt8OpenCLKernel::Prepare() { activation_max_ = 6.f; } - std::string program_name = "Arithmetic"; + const std::string program_name = "Arithmetic"; std::string source = arithmetic_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -207,7 +239,10 @@ int ArithmeticInt8OpenCLKernel::Prepare() { if (type() != PrimitiveType_BiasAdd) { InitWeights(); } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name_ << " Init Done!"; return RET_OK; } @@ -218,10 +253,22 @@ int ArithmeticInt8OpenCLKernel::Run() { auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1]; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h index 667ea8f4763..3f8feb78749 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h @@ -33,7 +33,7 @@ class ArithmeticInt8OpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc index 08f552c8d34..ea3599de657 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc @@ -67,15 +67,31 @@ void LayerNormGetWorkGroup(const std::vector &global, std::vectorpush_back(z); } -void LayerNormOpenCLKernel::SetConstArgs() { +int LayerNormOpenCLKernel::SetConstArgs() { int arg_cn = 6; GpuTensorInfo img_info(in_tensors_.at(0)); in_shape_.s[0] = img_info.N, in_shape_.s[1] = img_info.H, in_shape_.s[2] = img_info.W, in_shape_.s[3] = img_info.C; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_); - ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_); - ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void AlignMeanVarGlobalLocal(const std::vector &global, const std::vector &local, cl::NDRange *global_range, @@ -106,9 +122,23 @@ int LayerNormOpenCLKernel::Initweight() { size_t weight_size = img_info.Image2DSize; // allocated memory for weight and init value gamma_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (gamma_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } beta_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); - allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true); + if (beta_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(gamma_, 0x01, weight_size); memset(beta_, 0x00, weight_size); @@ -143,8 +173,14 @@ int LayerNormOpenCLKernel::Initweight() { memcpy(beta_, in_tensors_.at(2)->data_c(), weight_size); } } - allocator->UnmapBuffer(gamma_); - allocator->UnmapBuffer(beta_); + if (allocator->UnmapBuffer(gamma_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } + if (allocator->UnmapBuffer(beta_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } @@ -164,11 +200,19 @@ int LayerNormOpenCLKernel::Prepare() { size_t size_dtype = use_fp16_enable_ ? sizeof(float16_t) : sizeof(float); mean_size *= size_dtype; mean_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF); + if (mean_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } var_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF); - std::string kernel_name = "LayerNormalization_NHWC4"; + if (var_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + const std::string kernel_name = "LayerNormalization_NHWC4"; std::string kernel_name_mean_var = "ComputeMeanVar"; std::string source = layer_norm_source; - std::string program_name = "LayerNormalization"; + const std::string program_name = "LayerNormalization"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -182,7 +226,10 @@ int LayerNormOpenCLKernel::Prepare() { kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4"; ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext); MS_LOG(DEBUG) << kernel_name << " Init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; @@ -191,21 +238,48 @@ int LayerNormOpenCLKernel::Prepare() { int LayerNormOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; int arg1_cn = 0; - ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF); // mean_ - ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF); // var_ return RET_OK; + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // input tensor + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } ocl_runtime_->RunKernel(kernel_mean_var_, global_mean_var_, local_mean_var_, nullptr, &event_); int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); // out tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF); // mean_ - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF); // var_ - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF); // gamma_ - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF); // beta_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // input tensor + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // out tensor + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // mean_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // var_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // gamma_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // beta_ ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; -} +} // namespace mindspore::kernel REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, OpenCLKernelCreator) REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, OpenCLKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h index 67f40e01ad0..ca432abca14 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h @@ -31,7 +31,7 @@ class LayerNormOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 3815743c0c4..dc5b5b6cd51 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -84,7 +84,7 @@ int MatMulOpenCLKernel::Prepare() { std::map dims2str = {{2, "_2d"}, {3, "_4d"}, {4, "_4d"}}; kernel_name += dims2str[dims]; std::string source = matmul_source; - std::string program_name = "MatMul"; + const std::string program_name = "MatMul"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -95,13 +95,16 @@ int MatMulOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int co) { +int MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int co) { auto allocator = ocl_runtime_->GetAllocator(); int a = weight_shape_4d[0]; int b = weight_shape_4d[1]; @@ -109,7 +112,15 @@ void MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int int co4 = UP_DIV(co, C4NUM); size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size); @@ -157,6 +168,7 @@ void MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int } } } + return RET_OK; } int MatMulOpenCLKernel::InitWeights() { @@ -185,7 +197,10 @@ int MatMulOpenCLKernel::InitWeights() { PadWeight(weight_shape_4d, ci, CO_); - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_weight_); return InitBias(); } @@ -204,7 +219,15 @@ int MatMulOpenCLKernel::InitBias() { } lite::opencl::ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; bias_ = allocator->Malloc(img_size); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(bias_, 0x00, co4 * C4NUM * dtype_size); if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; @@ -220,7 +243,10 @@ int MatMulOpenCLKernel::InitBias() { memcpy(bias_, src_data, CO_ * dtype_size); } } - allocator->UnmapBuffer(bias_); + if (allocator->UnmapBuffer(bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); return RET_OK; } @@ -235,29 +261,54 @@ void MatMulOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void MatMulOpenCLKernel::SetConstArgs() { +int MatMulOpenCLKernel::SetConstArgs() { int arg_count = 2; cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; if (act_weight_) { arg_count++; } else { - ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } - ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int MatMulOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_count = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - if (act_weight_) { - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (act_weight_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h index 54aee868ba4..02c62986c18 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h @@ -32,7 +32,7 @@ class MatMulOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } int InitBias(); @@ -54,7 +54,7 @@ class MatMulOpenCLKernel : public OpenCLKernel { std::vector outShape{std::vector(MAX_DIMS, 1)}; private: - void PadWeight(std::vector weight_shape_4d, int ci, int co); + int PadWeight(std::vector weight_shape_4d, int ci, int co); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc index f6f231c1605..fe128cf5c49 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc @@ -48,7 +48,7 @@ int OneHotOpenCLKernel::Prepare() { kernel_name += "Axis" + std::to_string(axis_); } std::string source = one_hot_source; - std::string program_name = "OneHot"; + const std::string program_name = "OneHot"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -65,7 +65,10 @@ int OneHotOpenCLKernel::Prepare() { return ret; } InitWeights(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; @@ -87,18 +90,40 @@ int OneHotOpenCLKernel::InitWeights() { return RET_OK; } -void OneHotOpenCLKernel::SetConstArgs() { +int OneHotOpenCLKernel::SetConstArgs() { cl_int2 cl_in_image2d_shape = {static_cast(in_shape_.width), static_cast(in_shape_.height)}; cl_int4 cl_out_shape = {static_cast(out_shape_.N), static_cast(out_shape_.H), static_cast(out_shape_.W), static_cast(out_shape_.Slice)}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(out_shape_.C)); - ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast(param_->support_neg_index_)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(out_shape_.C)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast(param_->support_neg_index_)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void OneHotOpenCLKernel::SetGlobalLocal() { local_size_ = {}; @@ -108,9 +133,18 @@ void OneHotOpenCLKernel::SetGlobalLocal() { int OneHotOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h index 7efcc4e556f..add5beaf7bd 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h @@ -33,7 +33,7 @@ class OneHotOpenCLKernel : public OpenCLKernel { int Prepare() override; int InitWeights() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc index fee30266b16..3cd6fdd054f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc @@ -81,11 +81,14 @@ int PadOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } -void PadOpenCLKernel::SetConstArgs() { +int PadOpenCLKernel::SetConstArgs() { auto input = GpuTensorInfo(in_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front()); cl_int4 input_shape = {static_cast(input.N), static_cast(input.H), static_cast(input.W), @@ -105,20 +108,45 @@ void PadOpenCLKernel::SetConstArgs() { Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0); int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } local_size_ = {8, 4, 1}; global_size_ = {output.N * output.H, output.W, output.Slice}; AlignGlobalLocal(global_size_, local_size_); + return RET_OK; } int PadOpenCLKernel::Run() { - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h index 4464241d1d6..3752982727d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h @@ -35,7 +35,7 @@ class PadOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int Prepare() override; - void SetConstArgs() override; + int SetConstArgs() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 668863226b8..01a90630d5d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -73,7 +73,7 @@ int PoolingOpenCLKernel::Prepare() { kernel_name += "_NHWC4"; kernel_name += "_IMG"; std::string source = pooling2d_source; - std::string program_name = "Pooling2d"; + const std::string program_name = "Pooling2d"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -84,7 +84,10 @@ int PoolingOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; @@ -100,7 +103,7 @@ void PoolingOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void PoolingOpenCLKernel::SetConstArgs() { +int PoolingOpenCLKernel::SetConstArgs() { int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); cl_int4 input_shape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], slices}; cl_int4 output_shape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2], @@ -109,19 +112,44 @@ void PoolingOpenCLKernel::SetConstArgs() { cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int PoolingOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h index e47b34b1bf0..68a67f7fa56 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h @@ -32,7 +32,7 @@ class PoolingOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc index 817c6aaeeaf..b9d8890fb5c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc @@ -63,15 +63,21 @@ void PowerGetWorkGroup(const std::vector &global, std::vector *l local->push_back(z); } -void PowerOpenCLKernel::SetConstArgs() { +int PowerOpenCLKernel::SetConstArgs() { float unalign_w = static_cast(out_shape_.s[3]); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); int arg_cn = 2; if (!broadcast_) { arg_cn++; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } if (use_fp16_enable_) { auto x = static_cast(power_); @@ -80,11 +86,18 @@ void PowerOpenCLKernel::SetConstArgs() { auto w = static_cast(unalign_w); cl_half4 parameter = {*(reinterpret_cast(&x)), *(reinterpret_cast(&y)), *(reinterpret_cast(&z)), *(reinterpret_cast(&w))}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { cl_float4 parameter = {power_, shift_, scale_, unalign_w}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void PowerOpenCLKernel::SetGlobalLocal() { @@ -111,7 +124,7 @@ int PowerOpenCLKernel::Prepare() { auto param = reinterpret_cast(this->op_parameter_); std::string kernel_name = "power"; std::string source = power_source; - std::string program_name = "power"; + const std::string program_name = "power"; if (broadcast_) { power_ = param->power_; kernel_name += "_broadcast"; @@ -130,7 +143,10 @@ int PowerOpenCLKernel::Prepare() { } MS_LOG(DEBUG) << kernel_name << " Init Done!"; SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } @@ -138,13 +154,28 @@ int PowerOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; int arg_cn = 0; if (broadcast_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h index 71934bd7b92..ea36486b0a5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h @@ -30,7 +30,7 @@ class PowerOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 9e7f08a1510..2784f06b708 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -46,7 +46,14 @@ int PReluOpenCLKernel::InitWeights() { auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float); size_t weight_size = UP_ROUND(C_, C4NUM) * sizeof_FLT; weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); - allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true); + if (weight_vector_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(weight_vector_, 0x00, weight_size); if (weight_tensor->data_type() == kNumberTypeFloat16) { if (enable_fp16_) { @@ -69,7 +76,10 @@ int PReluOpenCLKernel::InitWeights() { memcpy(weight_vector_, weight_tensor->data_c(), C_ * sizeof_FLT); } } - allocator->UnmapBuffer(weight_vector_); + if (allocator->UnmapBuffer(weight_vector_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } return RET_OK; } @@ -95,11 +105,18 @@ int PReluOpenCLKernel::CheckSpecs() { return RET_OK; } -void PReluOpenCLKernel::SetConstArgs() { +int PReluOpenCLKernel::SetConstArgs() { int arg_idx = 3; out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void PReluOpenCLKernel::SetGlobalLocal() { @@ -126,8 +143,8 @@ int PReluOpenCLKernel::Prepare() { weight_is_scalar = param->channelShared; enable_fp16_ = ocl_runtime_->GetFp16Enable(); std::string source = prelu_source; - std::string program_name = "PRelu"; - std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); + const std::string program_name = "PRelu"; + const std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -141,7 +158,10 @@ int PReluOpenCLKernel::Prepare() { InitWeights(); MS_LOG(DEBUG) << program_name << " init Done!"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name << " init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } @@ -149,12 +169,24 @@ int PReluOpenCLKernel::Prepare() { int PReluOpenCLKernel::Run() { MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (weight_is_scalar) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ret != mindspore::lite::RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h index 739149eee49..b6e6d3de247 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h @@ -31,7 +31,7 @@ class PReluOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; int InitWeights() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc index 237820dc37f..4186f6911c7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "include/errorcode.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/opencl/kernel/reduce.h" @@ -179,7 +180,7 @@ int ReduceOpenCLKernel::Prepare() { } kernel_name += GetReduceTypeStr(reduce_param->mode_); std::string source = reduce_source; - std::string program_name = "Reduce"; + const std::string program_name = "Reduce"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -190,22 +191,32 @@ int ReduceOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void ReduceOpenCLKernel::SetConstArgs() { +int ReduceOpenCLKernel::SetConstArgs() { int h = inShape.H; int w = inShape.W; int c = inShape.C; int c4 = UP_DIV(c, C4NUM); cl_int4 size = {h, w, c4, c}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); - if (wc_reduce_ || c_reduce_) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } + if (wc_reduce_ || c_reduce_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + return RET_OK; } void ReduceOpenCLKernel::SetGlobalLocal() { int h = inShape.H; @@ -235,9 +246,18 @@ int ReduceOpenCLKernel::Tune() { int ReduceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h index 2d359a19ee7..ae70347aaa0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h @@ -32,7 +32,7 @@ class ReduceOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 79116366827..b343ecc5ed2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -53,15 +53,22 @@ int ReshapeOpenCLKernel::CheckSpecs() { return RET_OK; } -void ReshapeOpenCLKernel::SetConstArgs() { +int ReshapeOpenCLKernel::SetConstArgs() { auto in = GpuTensorInfo(in_tensors_.front()); auto out = GpuTensorInfo(out_tensors_.front()); cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)}; cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ReshapeOpenCLKernel::SetGlobalLocal() { @@ -72,9 +79,9 @@ void ReshapeOpenCLKernel::SetGlobalLocal() { } int ReshapeOpenCLKernel::Prepare() { - std::string kernel_name = "reshape_NHWC4"; + const std::string kernel_name = "reshape_NHWC4"; std::string source = reshape_source; - std::string program_name = "reshape"; + const std::string program_name = "reshape"; auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -87,16 +94,28 @@ int ReshapeOpenCLKernel::Prepare() { } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } int ReshapeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } @@ -104,7 +123,10 @@ int ReshapeOpenCLKernel::PreProcess() { if (type() == PrimitiveType_Reshape && !InferShapeDone()) { auto shape_tensor = in_tensors_[1]; if (!shape_tensor->IsConst()) { - ocl_runtime_->SyncCommandQueue(); + if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + return RET_ERROR; + } shape_tensor->MutableData(); } } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h index 149e50ab96c..7b9025b5866 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h @@ -30,7 +30,7 @@ class ReshapeOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int PreProcess() override; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc index 8d4156db470..cf91a167f4f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc @@ -64,7 +64,7 @@ int ResizeOpenCLKernel::Prepare() { } kernel_name += "_NHWC4"; std::string source = resize_source; - std::string program_name = "Resize"; + const std::string program_name = "Resize"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -75,7 +75,10 @@ int ResizeOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; @@ -87,7 +90,7 @@ float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size) : static_cast(input_size) / static_cast(output_size); } -void ResizeOpenCLKernel::SetConstArgs() { +int ResizeOpenCLKernel::SetConstArgs() { auto in_shape = in_tensors_[0]->shape(); auto out_shape = out_tensors_[0]->shape(); int n = out_shape[0]; @@ -101,9 +104,19 @@ void ResizeOpenCLKernel::SetConstArgs() { cl_int4 out_size = {n, h, w, c4}; cl_float2 scale = {scale_h, scale_w}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ResizeOpenCLKernel::SetGlobalLocal() { @@ -116,9 +129,18 @@ void ResizeOpenCLKernel::SetGlobalLocal() { int ResizeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } @@ -126,7 +148,10 @@ int ResizeOpenCLKernel::PreProcess() { if (type() == PrimitiveType_Resize && !InferShapeDone() && in_tensors_.size() == INPUT_TENSOR_SIZE_2) { auto shape_tensor = in_tensors_[1]; if (!shape_tensor->IsConst()) { - ocl_runtime_->SyncCommandQueue(); + if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + return RET_ERROR; + } shape_tensor->MutableData(); } } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h index 38b5eee6d9e..ea73e0b10a7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h @@ -31,7 +31,7 @@ class ResizeOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int PreProcess() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index f298fff5958..14c83e0a780 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -98,14 +98,30 @@ int ScaleOpenCLKernel::InitWeights() { img_size.height = 1; img_size.width = UP_DIV(scale_tensor->shape()[0], C4NUM); scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c()); + if (scale_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c()); + if (offset_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } return RET_OK; } if (in_tensor->format() == scale_tensor->format()) { if (in_tensor->data_type() == scale_tensor->data_type()) { scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c()); + if (scale_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c()); + if (offset_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to " << in_tensor->data_type(); @@ -121,7 +137,15 @@ int ScaleOpenCLKernel::InitWeights() { PackNHWCToNHWC4(scale_tensor->data_c(), scale.data(), src_is_fp16, fp16_enable, image2d_info); PackNHWCToNHWC4(offset_tensor->data_c(), offset.data(), src_is_fp16, fp16_enable, image2d_info); scale_ptr_ = allocator->Malloc(img_size, scale.data()); + if (scale_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ptr_ = allocator->Malloc(img_size, offset.data()); + if (offset_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to " << in_tensor->data_type(); @@ -175,7 +199,7 @@ int ScaleOpenCLKernel::Prepare() { } else { kernel_name += "_BUF"; } - std::string program_name = "Scale"; + const std::string program_name = "Scale"; std::string source = GetActDefines() + scale_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -193,44 +217,86 @@ int ScaleOpenCLKernel::Prepare() { return RET_OK; } -int ScaleOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; - auto *param = reinterpret_cast(op_parameter_); +int ScaleOpenCLKernel::SetKernelArg(int *idx) { int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + return RET_ERROR; + } if (weight_vector_flag_) { void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_; void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) { + return RET_ERROR; + } } else { if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { float scale = static_cast(in_tensors_[1]->data_c())[0]; float offset = static_cast(in_tensors_[2]->data_c())[0]; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) { + return RET_ERROR; + } } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { float16_t scale = static_cast(in_tensors_[1]->data_c())[0]; float16_t offset = static_cast(in_tensors_[2]->data_c())[0]; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(scale)); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(offset)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(scale)) != CL_SUCCESS) { + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(offset)) != CL_SUCCESS) { + return RET_ERROR; + } } else { MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[1]->data_type(); return RET_ERROR; } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + return RET_ERROR; + } cl_int2 output_shape{static_cast(global_size_[0]), static_cast(global_size_[1])}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + return RET_ERROR; + } + *idx = arg_idx; + return RET_OK; +} + +int ScaleOpenCLKernel::Run() { + MS_LOG(DEBUG) << this->name() << " Running!"; + auto *param = reinterpret_cast(op_parameter_); + int arg_idx = 0; + + if (SetKernelArg(&arg_idx) != RET_OK) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (weight_vector_flag_ && broadcast_flag_) { if (broadcast_H_flag_) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h index 755bdc1db28..f1abc693ff7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h @@ -34,7 +34,7 @@ class ScaleOpenCLKernel : public OpenCLKernel { private: void Image2dGetWorkGroupSize(); - + int SetKernelArg(int *idx); bool weight_vector_flag_{true}; bool broadcast_flag_{false}; bool broadcast_H_flag_{false}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index 2491f59036c..9f8fb994a90 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -75,7 +75,7 @@ int SoftmaxOpenCLKernel::Prepare() { kernel_name += "Axis" + std::to_string(axis_); } kernel_name += "_NHWC4"; - std::string program_name = "Softmax"; + const std::string program_name = "Softmax"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -93,7 +93,10 @@ int SoftmaxOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return lite::RET_OK; @@ -131,24 +134,40 @@ int SoftmaxOpenCLKernel::Tune() { return OpenCLKernel::Tune(); } -void SoftmaxOpenCLKernel::SetConstArgs() { +int SoftmaxOpenCLKernel::SetConstArgs() { int arg_idx = 2; int channel = out_shape_.C; int c4 = out_shape_.Slice; auto mask_ = GetMaskForLastChannel(channel); cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } cl_int4 input_shape = {static_cast(out_shape_.N), static_cast(out_shape_.H), static_cast(out_shape_.W), c4}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int SoftmaxOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h index da0b75b29e0..504e1e8715f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h @@ -30,7 +30,7 @@ class SoftmaxOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc index 6b6da404602..09f6cc70871 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc @@ -61,7 +61,7 @@ int SpaceToBatchNDOpenCLKernel::CheckSpecs() { return RET_OK; } -void SpaceToBatchNDOpenCLKernel::SetConstArgs() { +int SpaceToBatchNDOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(this->op_parameter_); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); @@ -71,10 +71,23 @@ void SpaceToBatchNDOpenCLKernel::SetConstArgs() { cl_int4 paddings = {param->paddings_[0], param->paddings_[1], param->paddings_[2], param->paddings_[3]}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() { @@ -87,9 +100,9 @@ void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() { } int SpaceToBatchNDOpenCLKernel::Prepare() { - std::string kernel_name = "space_to_batch_nd_NHWC4"; + const std::string kernel_name = "space_to_batch_nd_NHWC4"; std::string source = space_to_batch_nd_source; - std::string program_name = "space_to_batch_nd"; + const std::string program_name = "space_to_batch_nd"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -101,7 +114,10 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -109,9 +125,18 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { int SpaceToBatchNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h index 30df823c059..e545c68b2a4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h @@ -32,7 +32,7 @@ class SpaceToBatchNDOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc index 0303ea31bdb..0e69cd3ef23 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc @@ -51,7 +51,7 @@ int SpaceToDepthOpenCLKernel::Prepare() { kernel_name += "Align"; } std::string source = space_to_depth_source; - std::string program_name = "SpaceToDepth"; + const std::string program_name = "SpaceToDepth"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -63,28 +63,47 @@ int SpaceToDepthOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void SpaceToDepthOpenCLKernel::SetConstArgs() { +int SpaceToDepthOpenCLKernel::SetConstArgs() { cl_int4 cl_in_shape = {static_cast(in_shape_.N), static_cast(in_shape_.H), static_cast(in_shape_.W), static_cast(in_shape_.Slice)}; cl_int4 cl_out_shape = {static_cast(out_shape_.N), static_cast(out_shape_.H), static_cast(out_shape_.W), static_cast(out_shape_.Slice)}; auto param = reinterpret_cast(op_parameter_); int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (type() == PrimitiveType_DepthToSpace) { int co_size = out_shape_.C; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { int ci_size = in_shape_.C; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void SpaceToDepthOpenCLKernel::SetGlobalLocal() { local_size_ = {}; @@ -95,9 +114,18 @@ void SpaceToDepthOpenCLKernel::SetGlobalLocal() { int SpaceToDepthOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h index 3576e26d616..75ee5d1d1b6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h @@ -32,7 +32,7 @@ class SpaceToDepthOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc index f3f6c8c084f..dc532bbbb92 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc @@ -37,7 +37,10 @@ int SparseToDenseOpenCLKernel::InitOutputToDefault() { cl_float4 fill_value = {}; fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_; auto src_data = out_tensors_[0]->data_c(); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -62,7 +65,14 @@ int SparseToDenseOpenCLKernel::InitWeights() { auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float); size_t weight_size = UP_ROUND(size, C4NUM) * sizeof_FLT; weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); - allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true); + if (weight_vector_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(weight_vector_, 0x00, weight_size); if (weight_tensor->data_type() == kNumberTypeFloat16) { if (enable_fp16_) { @@ -85,7 +95,10 @@ int SparseToDenseOpenCLKernel::InitWeights() { memcpy(weight_vector_, weight_tensor->data_c(), size * sizeof_FLT); } } - allocator->UnmapBuffer(weight_vector_); + if (allocator->UnmapBuffer(weight_vector_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } return RET_OK; } @@ -115,7 +128,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() { return RET_OK; } -void SparseToDenseOpenCLKernel::SetConstArgs() { +int SparseToDenseOpenCLKernel::SetConstArgs() { auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); GpuTensorInfo img_info(out_tensors_[0]); size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); @@ -124,11 +137,27 @@ void SparseToDenseOpenCLKernel::SetConstArgs() { auto out_shape_temp = out_tensors_[0]->shape(); cl_int4 out_shape = {out_n_, out_h_, out_w_, UP_DIV(out_c_, C4NUM)}; int arg_cn = 3; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void SparseToDenseOpenCLKernel::SetGlobalLocal() { @@ -144,9 +173,9 @@ int SparseToDenseOpenCLKernel::Prepare() { input_dim_ = in_tensors_[0]->shape().size(); inshapeindex1_dim = in_tensors_[0]->shape()[1]; weight_scalar_ = in_tensors_[2]->IsScalar(); - std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector"); + const std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector"); std::string source = sparse_to_dense_source; - std::string program_name = "SparseToDense"; + const std::string program_name = "SparseToDense"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -174,7 +203,10 @@ int SparseToDenseOpenCLKernel::Prepare() { InitWeights(); InferShapeTo4D(); SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -212,14 +244,30 @@ int SparseToDenseOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; InitOutputToDefault(); int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - if (!weight_scalar_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF); - } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (!weight_scalar_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } else { + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h index 0ffc6359f98..f98dc6f0265 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h @@ -31,7 +31,7 @@ class SparseToDenseOpenCLKernel : public OpenCLKernel { int Prepare() override; int Run() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int CheckSpecs() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc index 862d4f2dba1..206bbffbf33 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc @@ -41,7 +41,10 @@ int SplitOpenCLKernel::RunAxis0() { for (int i = 0; i < out_tensors_.size(); i++) { auto dst_data = out_tensors_[i]->data_c(); ImageSize img_size; - allocator_->GetImageSize(dst_data, &img_size); + if (allocator_->GetImageSize(dst_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto dst_area = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); @@ -93,23 +96,32 @@ int SplitOpenCLKernel::CheckSpecs() { return RET_OK; } -void SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector &in_shape) { +int SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector &in_shape) { auto allocator = ocl_runtime_->GetAllocator(); int shape_dim = in_shape.at(param->split_dim_); if (num_split_ == 1) { size_t num_split = UP_DIV(shape_dim, param->split_sizes_[0]); split_sizes_ = reinterpret_cast(allocator->Malloc(num_split * sizeof(int), lite::opencl::MemType::BUF)); + if (split_sizes_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } for (int i = 0; i < num_split - 1; ++i) { split_sizes_[i] = (i + 1) * param->split_sizes_[0]; } } else { int sum = 0; split_sizes_ = reinterpret_cast(allocator->Malloc(num_split_ * sizeof(int), lite::opencl::MemType::BUF)); + if (split_sizes_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } for (int i = 0; i < num_split_ - 1; ++i) { sum += param->split_sizes_[i]; split_sizes_[i] = sum; } } + return RET_OK; } int SplitOpenCLKernel::Prepare() { @@ -129,7 +141,10 @@ int SplitOpenCLKernel::Prepare() { } } } - AlignSplitSizes(param, in_shape); + if (AlignSplitSizes(param, in_shape) != RET_OK) { + MS_LOG(ERROR) << "AlignSplitSizes failed."; + return RET_ERROR; + } std::string kernel_name = "split_out"; kernel_name += std::to_string(num_split_); kernel_name += "_axis" + std::to_string(split_dim_); @@ -138,7 +153,7 @@ int SplitOpenCLKernel::Prepare() { } MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::string source = split_source; - std::string program_name = "split"; + const std::string program_name = "split"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -151,12 +166,15 @@ int SplitOpenCLKernel::Prepare() { return ret; } MS_LOG(DEBUG) << kernel_name << " Init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } -void SplitOpenCLKernel::SetConstArgs() { +int SplitOpenCLKernel::SetConstArgs() { int arg_cn = out_tensors_.size() + 2; cl_int4 shape = {}; for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { @@ -166,7 +184,10 @@ void SplitOpenCLKernel::SetConstArgs() { if (Align_) { in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } for (int i = 0; i < out_tensors_.size(); ++i) { cl_int4 temp = {}; @@ -177,13 +198,21 @@ void SplitOpenCLKernel::SetConstArgs() { if (Align_) { out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } - GpuTensorInfo img_info(in_tensors_.at(0)); - size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); - stride_w = img_info.RowPitch() / dtype; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); - return; + if (!Align_) { + GpuTensorInfo img_info(in_tensors_.at(0)); + size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); + stride_w = img_info.RowPitch() / dtype; + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + return RET_OK; } void SplitOpenCLKernel::SetGlobalLocal() { @@ -205,15 +234,31 @@ int SplitOpenCLKernel::Run() { } int arg_cn = 0; if (Align_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } for (int i = 0; i < out_tensors_.size(); ++i) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h index c8be6a244da..b7e25a93996 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h @@ -31,12 +31,12 @@ class SplitOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; private: - void AlignSplitSizes(SplitParameter *param, const std::vector &in_shape); + int AlignSplitSizes(SplitParameter *param, const std::vector &in_shape); int RunAxis0(); private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc index 819c2ab8b7c..5b08fbb3245 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc @@ -36,7 +36,10 @@ int StackOpenCLKernel::RunAxis0() { cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); for (int i = 0; i < in_tensors_.size(); i++) { auto src_data = in_tensors_[i]->data_c(); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *input_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -95,7 +98,7 @@ int StackOpenCLKernel::CheckSpecs() { return RET_OK; } -void StackOpenCLKernel::SetConstArgs() { +int StackOpenCLKernel::SetConstArgs() { int arg_cn = in_tensors_.size() + 1; cl_int4 inshape_tmp = {}, outshape_tmp = {}; for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { @@ -108,8 +111,14 @@ void StackOpenCLKernel::SetConstArgs() { Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (buffer_button_) { GpuTensorInfo img_info_out(out_tensors_[0]); GpuTensorInfo img_info_in(in_tensors_[0]); @@ -117,8 +126,12 @@ void StackOpenCLKernel::SetConstArgs() { stride_w_out = img_info_out.RowPitch() / dtype; stride_w_in = img_info_in.RowPitch() / dtype; cl_int2 stride_w = {stride_w_out, stride_w_in}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void StackOpenCLKernel::SetGlobalLocal() { @@ -162,7 +175,7 @@ int StackOpenCLKernel::Prepare() { MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::string source = stack_source; - std::string program_name = "stack"; + const std::string program_name = "stack"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -174,7 +187,10 @@ int StackOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; @@ -188,16 +204,33 @@ int StackOpenCLKernel::Run() { int arg_cn = 0; if (buffer_button_) { for (int i = 0; i < in_tensors_.size(); ++i) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } else { for (int i = 0; i < in_tensors_.size(); ++i) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h index a41bc0ff7ee..1585fae341d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h @@ -29,7 +29,7 @@ class StackOpenCLKernel : public OpenCLKernel { ~StackOpenCLKernel() override{}; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc index 59df111e2a8..bd21ab17886 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc @@ -27,9 +27,9 @@ using mindspore::lite::opencl::ImageSize; namespace mindspore::kernel { int StrassenOpenCLKernel::Prepare() { - std::string kernel_name = "MatMul_Strassen_NHWC4_2d"; + const std::string kernel_name = "MatMul_Strassen_NHWC4_2d"; std::string source = strassen_source; - std::string program_name = "MatMul"; + const std::string program_name = "MatMul"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -50,13 +50,16 @@ int StrassenOpenCLKernel::Prepare() { if (ret != RET_OK) { return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { +int StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { auto allocator = ocl_runtime_->GetAllocator(); size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{static_cast(UP_DIV(NumA, C4NUM)), static_cast(NumA), img_dtype}; @@ -64,15 +67,52 @@ void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { size_t memB = NumB * NumB * dtype_size; for (int depth = 0; depth < MAXDEPTH; depth++) { B_temp[depth] = allocator->Malloc(memB, lite::opencl::MemType::BUF); + if (B_temp[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } A_temp[depth] = allocator->Malloc(img_size); + if (A_temp[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M1[depth] = allocator->Malloc(img_size); + if (M1[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M2[depth] = allocator->Malloc(img_size); + if (M2[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M3[depth] = allocator->Malloc(img_size); + if (M3[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M4[depth] = allocator->Malloc(img_size); + if (M4[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M5[depth] = allocator->Malloc(img_size); + if (M5[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M6[depth] = allocator->Malloc(img_size); + if (M6[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M7[depth] = allocator->Malloc(img_size); + if (M7[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } + return RET_OK; } int StrassenOpenCLKernel::InitWeights() { @@ -82,14 +122,25 @@ int StrassenOpenCLKernel::InitWeights() { int NumB = in_tensors_[1]->shape()[0]; size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(NumA * NumB * dtype_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, NumA * NumB * dtype_size); auto originWeightFp32 = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); auto originWeightFp16 = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16; - AllocatorMemoryForStrassen(NumA / 2, NumB / 2); + if (AllocatorMemoryForStrassen(NumA / 2, NumB / 2) != RET_OK) { + MS_LOG(ERROR) << "AllocatorMemoryForStrassen failed."; + return RET_ERROR; + } size_t size = NumA * NumB * dtype_size; if (isModelFp16) { if (enable_fp16_) { @@ -108,7 +159,10 @@ int StrassenOpenCLKernel::InitWeights() { memcpy(padWeightFp32, originWeightFp32, size); } } - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } @@ -120,7 +174,7 @@ void AlignStrassenGlobalLocal(const std::vector &global, const std::vect } // 0 : global_size_, 1: global_size_add_sub -void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) { +int StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) { size_t strassen_size_C4 = UP_DIV(strassen_size, C4NUM); local_size_add_sub = {16, 1, 16}; if (type_flag == 0) { @@ -130,6 +184,7 @@ void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type global_size_add_sub = {strassen_size_C4, 1, strassen_size}; AlignStrassenGlobalLocal(global_size_add_sub, local_size_add_sub, &global_add_sub_, &local_add_sub_); } + return RET_OK; } void StrassenOpenCLKernel::SetGlobalLocal() { @@ -142,111 +197,188 @@ void StrassenOpenCLKernel::SetGlobalLocal() { StrassenSetGlobalLocal(strassen_size, 2); // set global_size_weights } -void StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, - bool is_matmul_kernel) { +int StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, + bool is_matmul_kernel) { cl_int4 shape; if (is_matmul_kernel) { shape = {1, 1, strassen_size, strassen_size}; } else { shape = {strassen_size, 1, 1, UP_DIV(strassen_size, C4NUM)}; } - ocl_runtime_->SetKernelArg(*kernel, index, shape); + if (ocl_runtime_->SetKernelArg(*kernel, index, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::SetConstArgs() { - int arg_count = 2; - cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; - cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; - cl_int4 shape_offset = {0, 0, 0, 0}; +int StrassenOpenCLKernel::SetConstArgs() { int strassen_size = inShape[3] / 2; - out_shape.s[2] = in_shape.s[2] = in_shape.s[2] / 2; - out_shape.s[3] = in_shape.s[3] = in_shape.s[3] / 2; StrassenSetConstArgs(&kernel_IMG_add_sub_2, 3, strassen_size, false); StrassenSetConstArgs(&kernel_BUF_add_sub_2, 2, strassen_size, false); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, shape_offset); + return RET_OK; } -void StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, - cl_int2 offset, lite::opencl::MemType mem_type) { +int StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, + cl_int2 offset, lite::opencl::MemType mem_type) { if (input == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenDataFilled input or output can not nullptr"; - return; + return RET_ERROR; } if (mem_type == lite::opencl::MemType::IMG) { - ocl_runtime_->SetKernelArg(*kernel, 0, input); - ocl_runtime_->SetKernelArg(*kernel, 1, output); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } StrassenSetConstArgs(kernel, 2, size, false); - ocl_runtime_->SetKernelArg(*kernel, 3, offset); - ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, - int flag, lite::opencl::MemType mem_type) { +int StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, + int flag, lite::opencl::MemType mem_type) { if (input == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenAddSub input or output can not nullptr"; - return; + return RET_ERROR; } if (mem_type == lite::opencl::MemType::IMG) { - ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } StrassenSetConstArgs(kernel, 2, size, false); - ocl_runtime_->SetKernelArg(*kernel, 3, offset); - ocl_runtime_->SetKernelArg(*kernel, 4, flag); - ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 4, flag) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, - void *input4, void *input5, void *input6, void *input7, void *output, - const int size) { +int StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, + void *input5, void *input6, void *input7, void *output, const int size) { if (input1 == nullptr || input2 == nullptr || input3 == nullptr || input4 == nullptr || input5 == nullptr || input6 == nullptr || input7 == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenBackResult input or output can not nullptr"; - return; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 0, input1) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, input2) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 2, input3) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 3, input4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 4, input5) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 5, input6) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 6, input7) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 7, output) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(*kernel, 0, input1); - ocl_runtime_->SetKernelArg(*kernel, 1, input2); - ocl_runtime_->SetKernelArg(*kernel, 2, input3); - ocl_runtime_->SetKernelArg(*kernel, 3, input4); - ocl_runtime_->SetKernelArg(*kernel, 4, input5); - ocl_runtime_->SetKernelArg(*kernel, 5, input6); - ocl_runtime_->SetKernelArg(*kernel, 6, input7); - ocl_runtime_->SetKernelArg(*kernel, 7, output); StrassenSetConstArgs(kernel, 8, size, false); - ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); + if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) { +int StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) { if (input == nullptr || weight == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenRunMmatmul input ,weight or output can not nullptr"; - return; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 0, input) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, output) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, 0, input); - ocl_runtime_->SetKernelArg(kernel_, 1, output); - ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF); StrassenSetConstArgs(&kernel_, 3, size, true); StrassenSetConstArgs(&kernel_, 4, size, true); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth, - const int threshold) { +int StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth, + const int threshold) { const int size_2 = size / 2; int C4 = UP_DIV(size_2, C4NUM); if (size <= threshold) { // run matmul; StrassenSetGlobalLocal(size, 0); StrassenRunMmatmul(data, weight, result, size); - return; + return RET_OK; } // flag = 0 : add otherwise flag = 1 : sub // M1 = A11 * ( B12- B22) @@ -307,6 +439,7 @@ void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, co StrassenSetGlobalLocal(size_2, 1); StrassenBackResult(&kernel_back_result, M1[depth + 1], M2[depth + 1], M3[depth + 1], M4[depth + 1], M5[depth + 1], M6[depth + 1], M7[depth + 1], result, size_2); + return RET_OK; } int StrassenOpenCLKernel::Run() { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h index 808cddd6d18..48596a3ebd2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h @@ -33,22 +33,22 @@ class StrassenOpenCLKernel : public MatMulOpenCLKernel { int Run() override; int Prepare() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; // strassen private: - void AllocatorMemoryForStrassen(int NumA, int NumB); - void DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold); - void StrassenSetGlobalLocal(size_t strassen_size, int type_flag); - void StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel); - void StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset, - lite::opencl::MemType mem_type); - void StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag, - lite::opencl::MemType mem_type); - void StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5, - void *input6, void *input7, void *output, const int size); - void StrassenRunMmatmul(void *input, void *weight, void *output, const int size); + int AllocatorMemoryForStrassen(int NumA, int NumB); + int DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold); + int StrassenSetGlobalLocal(size_t strassen_size, int type_flag); + int StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel); + int StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset, + lite::opencl::MemType mem_type); + int StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag, + lite::opencl::MemType mem_type); + int StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5, + void *input6, void *input7, void *output, const int size); + int StrassenRunMmatmul(void *input, void *weight, void *output, const int size); cl::Kernel kernel_IMG_add_sub_2; cl::Kernel MatMul_StrassenBUFFilled; cl::Kernel MatMul_StrassenIMGFilled; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc index b1d7fa9b762..9d00ac7a4dd 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc @@ -85,7 +85,7 @@ int StridedSliceOpenCLKernel::CheckSpecs() { } int StridedSliceOpenCLKernel::Prepare() { - std::string program_name = "strided_slice"; + const std::string program_name = "strided_slice"; if (!ocl_runtime_->LoadSource(program_name, strided_slice_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -96,7 +96,10 @@ int StridedSliceOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } @@ -187,14 +190,33 @@ int StridedSliceOpenCLKernel::InitConstArgs() { return RET_OK; } -void StridedSliceOpenCLKernel::SetConstArgs() { +int StridedSliceOpenCLKernel::SetConstArgs() { int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void StridedSliceOpenCLKernel::SetGlobalLocal() { @@ -214,9 +236,18 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() { int StridedSliceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h index 87e2638dc49..3ce6b991ee5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h @@ -31,7 +31,7 @@ class StridedSliceOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int Prepare() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 5380f461462..0d6ff88d36d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -42,11 +42,18 @@ int ToFormatOpenCLKernel::CheckSpecs() { return RET_OK; } -void ToFormatOpenCLKernel::SetConstArgs() { +int ToFormatOpenCLKernel::SetConstArgs() { cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_}; cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1}; - ocl_runtime_->SetKernelArg(kernel_, 2, gsize); - ocl_runtime_->SetKernelArg(kernel_, 3, shape); + if (ocl_runtime_->SetKernelArg(kernel_, 2, gsize) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 3, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ToFormatOpenCLKernel::SetGlobalLocal() { @@ -70,7 +77,7 @@ int ToFormatOpenCLKernel::Prepare() { kernel_name += dtype_str[in_tensor->data_type()] + "_" + dtype_str[out_tensor->data_type()]; this->set_name(kernel_name); - std::string program_name = "to_format"; + const std::string program_name = "to_format"; std::string source = to_format_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -89,7 +96,10 @@ int ToFormatOpenCLKernel::Prepare() { C_ = output.C; SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -98,9 +108,18 @@ int ToFormatOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; auto dst_mem_type = out_mem_type_; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h index d600519e3c4..0e1989d157f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h @@ -35,7 +35,7 @@ class ToFormatOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int InferShape() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index 6841867de66..9c7cbea7c29 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -101,7 +101,7 @@ int TransposeOpenCLKernel::Prepare() { kernel_name += "_NHWC4"; std::string source = transpose_source; - std::string program_name = "transpose"; + const std::string program_name = "transpose"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -113,32 +113,45 @@ int TransposeOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void TransposeOpenCLKernel::SetConstArgs() { +int TransposeOpenCLKernel::SetConstArgs() { size_t n = tensor_size_.N; size_t h = tensor_size_.H; size_t w = tensor_size_.W; size_t c = tensor_size_.C; int arg_idx = 2; cl_int4 shape = {static_cast(n), static_cast(h), static_cast(w), static_cast(c)}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (type_ == TransposeType::GENERAL) { int de_perm[4]; // output to input perm for (int i = 0; i < 4; i++) { de_perm[perm_4d_[i]] = i; } cl_int4 de_perm_cl = {de_perm[0], de_perm[1], de_perm[2], de_perm[3]}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } GpuTensorInfo in_shape = GpuTensorInfo(in_tensors_[0]); cl_int4 in_shape_int4 = {static_cast(in_shape.N), static_cast(in_shape.H), static_cast(in_shape.W), static_cast(in_shape.C)}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void TransposeOpenCLKernel::SetGlobalLocal() { @@ -161,9 +174,18 @@ void TransposeOpenCLKernel::SetGlobalLocal() { int TransposeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h index 54edb3fd011..5daaf10cd35 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h @@ -33,7 +33,7 @@ class TransposeOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc index 7b52015c617..8e51bcaaaed 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc @@ -78,7 +78,7 @@ std::vector GenerateWinogradFilter(void *src, TypeId dtype, size_t CO, si } // namespace int WinogradOpenCLKernel::BuildKernel() { - std::string program_name = "winograd"; + const std::string program_name = "winograd"; if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -103,7 +103,7 @@ int WinogradOpenCLKernel::BuildKernel() { return RET_OK; } -void WinogradOpenCLKernel::InitFilter() { +int WinogradOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); // allocate opencl memory: buffer or image2d @@ -115,9 +115,17 @@ void WinogradOpenCLKernel::InitFilter() { size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size = width * height * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc({width, height, dtype}); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { size = UP_DIV(CO_SLICES_, Ogroup) * 6 * 6 * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc(size, MemType::BUF); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } // rearrange filter @@ -128,6 +136,10 @@ void WinogradOpenCLKernel::InitFilter() { void *src_data = winograd_filter.data(); #else auto winograd_filter = std::make_unique(CO_ * 6 * 6 * CI_); + if (winograd_filter == nullptr) { + MS_LOG(ERROR) << "new winograd_filter failed."; + return RET_ERROR; + } WinogradWeightTransform(reinterpret_cast(src_filter_data), reinterpret_cast(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false); @@ -147,53 +159,121 @@ void WinogradOpenCLKernel::InitFilter() { if (filter_type_ == MemType::IMG) { ocl_runtime_->WriteImage(packed_filter_, tmp.data()); } else { - allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memcpy(packed_filter_, tmp.data(), size); - allocator->UnmapBuffer(packed_filter_); + if (allocator->UnmapBuffer(packed_filter_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } FreeStoredData(stored_filter_); + return RET_OK; } -void WinogradOpenCLKernel::AllocateMemory() { +int WinogradOpenCLKernel::AllocateMemory() { auto allocator = ocl_runtime_->GetAllocator(); size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size_t width = TILE_HW_; size_t height = CI_SLICES_ * 36; winograd_mem0_ = allocator->Malloc({width, height, img_dtype}); + if (winograd_mem0_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } width = TILE_HW_; height = CO_SLICES_ * 36; winograd_mem1_ = allocator->Malloc({width, height, img_dtype}); + if (winograd_mem1_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + return RET_OK; } -void WinogradOpenCLKernel::SetConstArgs() { +int WinogradOpenCLKernel::SetConstArgs() { AllocateMemory(); int arg_cn = 1; cl_int4 input_shape = {batch_size_, OH_, OW_, CI_SLICES_}; // maybe pad=0, so use OH/OW - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_); + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } arg_cn = 2; cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_}; - ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_); + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void WinogradOpenCLKernel::SetGlobalLocal() { @@ -205,15 +285,30 @@ void WinogradOpenCLKernel::SetGlobalLocal() { int WinogradOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " winograd Running!"; MS_LOG(DEBUG) << "winograd kernel0 Running!"; - ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << "winograd kernel1 Running!"; - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_); + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << "winograd kernel2 Running!"; - ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_); + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h index 7ed7050a2d0..9f3da53f780 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h @@ -32,7 +32,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel { ~WinogradOpenCLKernel() override = default; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; @@ -42,8 +42,8 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel { private: int BuildKernel() override; - void InitFilter() override; - void AllocateMemory(); + int InitFilter() override; + int AllocateMemory(); cl::Kernel kernel_4x4to36_; cl::Kernel kernel_36to4x4_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc index 78e6a6842da..01f19b810d3 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc @@ -24,7 +24,7 @@ using mindspore::lite::RET_OK; using mindspore::lite::opencl::ImageSize; namespace mindspore::kernel { -int OpenCLKernel::AlignGlobalLocal(const std::vector &global, const std::vector &local) { +void OpenCLKernel::AlignGlobalLocal(const std::vector &global, const std::vector &local) { std::vector internal_global_ws = global; for (size_t i = 0; i < local.size(); ++i) { internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i)); @@ -50,16 +50,12 @@ int OpenCLKernel::AlignGlobalLocal(const std::vector &global, const std: if (!local.empty()) { local_range_ = cl::NDRange(local.at(0), local.at(1)); } - } else if (global.size() == 3) { + } else if (global.size() >= 3) { global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2)); if (!local.empty()) { local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2)); } - } else { - MS_LOG(ERROR) << "Not supported NDRange!"; - return RET_ERROR; } - return RET_OK; } int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) { @@ -112,11 +108,17 @@ void OpenCLKernel::PrintOutput(int print_num, const std::string &out_file) { auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); auto runtime = runtime_wrapper.GetInstance(); auto allocator = runtime->GetAllocator(); - runtime->SyncCommandQueue(); + if (!runtime->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + } if (mem_type == lite::opencl::MemType::BUF) { - allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true); + if (allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + } memcpy(data.data(), tensor->data_c(), img_info.OriginSize); - allocator->UnmapBuffer(tensor->data_c()); + if (allocator->UnmapBuffer(tensor->data_c()) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + } } else { runtime->ReadImage(tensor->data_c(), data.data()); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 24f10a7aa16..4e17512a38d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -185,7 +185,7 @@ class OpenCLKernel : public InnerKernel { ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); } ~OpenCLKernel() override = default; - int AlignGlobalLocal(const std::vector &global, const std::vector &local); + void AlignGlobalLocal(const std::vector &global, const std::vector &local); int Prepare() override { return RET_OK; } int PreProcess() override; @@ -194,7 +194,7 @@ class OpenCLKernel : public InnerKernel { virtual int CheckSpecs(); virtual int InitWeights() { return RET_OK; } - virtual void SetConstArgs() {} + virtual int SetConstArgs() { return RET_OK; } virtual void SetGlobalLocal() {} virtual int GetGlobalSize(size_t idx, std::vector *global_size) { return RET_ERROR; } virtual int GetLocalSize(size_t idx, const std::vector &global_size, std::vector *local_size) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index 957d89a77db..e1c52e51949 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -420,6 +420,7 @@ int OpenCLSubGraph::Execute() { return ret; } if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; return RET_ERROR; } return RET_OK; @@ -449,6 +450,7 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack & return ret; } if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; return RET_ERROR; } return RET_OK;