!6380 fixed opencl program stuck for some device

Merge pull request !6380 from liuchao/master
This commit is contained in:
mindspore-ci-bot 2020-09-17 10:14:59 +08:00 committed by Gitee
commit 2db8560a14
57 changed files with 306 additions and 414 deletions

View File

@ -55,8 +55,7 @@ int ActivationOpenClKernel::Init() {
c = in_tensors_[0]->shape()[3];
}
nhwc_shape_ = {n, h, w, c};
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
if (in_size_ != 2 && in_size_ != 4) {
MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_;
@ -75,9 +74,9 @@ int ActivationOpenClKernel::Init() {
std::string source = activation_source;
std::set<std::string> build_options;
ocl_runtime->LoadSource(Program_Kernel[type_][0], source);
ocl_runtime_->LoadSource(Program_Kernel[type_][0], source);
std::string kernel_name = Program_Kernel[type_][1];
ocl_runtime->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
@ -89,17 +88,16 @@ int ActivationOpenClKernel::Init() {
int ActivationOpenClKernel::Run() {
MS_LOG(DEBUG) << op_parameter_->name_ << " begin running!";
cl_int4 img2d_shape = GetImg2dShape();
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, img2d_shape);
if (type_ == ActivationType_LEAKY_RELU) {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_);
}
std::vector<size_t> local = {};
std::vector<size_t> global = {static_cast<size_t>(img2d_shape.s[1]), static_cast<size_t>(img2d_shape.s[2])};
auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr);
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel:" << op_parameter_->name_ << " fail.";
return RET_ERROR;

View File

@ -19,7 +19,6 @@
#include <vector>
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/activation.h"

View File

@ -34,7 +34,7 @@ namespace mindspore::kernel {
ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() {
if (weight_ptr_ != nullptr) {
auto allocator = runtime_->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
allocator->Free(weight_ptr_);
weight_ptr_ = nullptr;
}
@ -106,7 +106,7 @@ int ArithmeticOpenCLKernel::InitBuffer() {
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
if (!arithmetic_parameter->broadcasting_) {
if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) {
auto allocator = runtime_->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
GetImageSize(0, &img_size);
int pack_weight_size = in_tensors_[1]->ElementsC4Num();
@ -194,7 +194,6 @@ int ArithmeticOpenCLKernel::InitBuffer() {
}
int ArithmeticOpenCLKernel::Init() {
runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name;
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
@ -265,7 +264,7 @@ int ArithmeticOpenCLKernel::Init() {
lite::STATUS error_code = RET_OK;
#ifdef PROGRAM_WITH_IL
kernel_ = runtime_->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
if (out_mem_type_ == OpenCLMemType::IMG) {
kernel_name += "_IMG";
@ -275,8 +274,8 @@ int ArithmeticOpenCLKernel::Init() {
std::string program_name = "Arithmetic";
std::set<std::string> build_options;
std::string source = arithmetic_source;
runtime_->LoadSource(program_name, source);
error_code = runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
if (error_code != RET_OK) {
return error_code;
@ -302,10 +301,10 @@ int ArithmeticOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
int arg_idx = 0;
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (element_flag_) {
void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_;
runtime_->SetKernelArg(kernel_, arg_idx++, weight);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight);
} else {
float weight = 0.f;
if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
@ -316,9 +315,9 @@ int ArithmeticOpenCLKernel::Run() {
MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type();
return RET_ERROR;
}
runtime_->SetKernelArg(kernel_, arg_idx++, weight);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight);
}
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
int H = 0;
int W = 0;
@ -336,8 +335,8 @@ int ArithmeticOpenCLKernel::Run() {
return RET_ERROR;
}
cl_int2 output_shape{W, H};
runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
return RET_OK;
}

View File

@ -19,7 +19,6 @@
#include <vector>
#include "src/runtime/kernel/arm/fp32/arithmetic.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::kernel {
@ -42,7 +41,6 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
int InitBuffer();
cl::Kernel kernel_;
lite::opencl::OpenCLRuntime *runtime_;
bool element_flag_{true};
void *weight_ptr_{nullptr};

View File

@ -17,7 +17,6 @@
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/arithmetic_self.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/arithmeticself.cl.inc"
@ -51,8 +50,7 @@ int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *im
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@ -136,9 +134,8 @@ int ArithmeticSelfOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = arithmeticself_source;
std::string program_name = "ArithmeticSelf";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
return RET_OK;
}
@ -162,7 +159,6 @@ void ArithmeticSelfGetWorkGroup(const std::vector<size_t> &global, std::vector<s
int ArithmeticSelfOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto output_shape = out_tensors_[0]->shape();
cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)};
@ -170,17 +166,17 @@ int ArithmeticSelfOpenCLKernel::Run() {
uint32_t OW = output_shape[2];
uint32_t OC = UP_DIV(output_shape[3], C4NUM);
const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {OH, OW, OC};
ArithmeticSelfGetWorkGroup(global, &local, max_global[0]);
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -21,7 +21,6 @@
#include <string>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/arithmetic_self_parameter.h"
namespace mindspore::kernel {

View File

@ -18,7 +18,6 @@
#include <set>
#include <string>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/batchnorm.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/batchnorm.cl.inc"
@ -40,8 +39,7 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@ -72,9 +70,8 @@ int BatchNormOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = batchnorm_source;
std::string program_name = "Batch_normalization";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
return RET_OK;
}
@ -98,7 +95,6 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
int BatchNormOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto input0_shape = in_tensors_[0]->shape();
auto output_shape = out_tensors_[0]->shape();
cl_int4 input_shape_ = {input0_shape[0], input0_shape[1], input0_shape[2], UP_DIV(input0_shape[3], C4NUM)};
@ -107,20 +103,20 @@ int BatchNormOpenCLKernel::Run() {
uint32_t OW = output_shape[2];
uint32_t OC = UP_DIV(output_shape[3], C4NUM);
const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {OH, OW, OC};
BatchNormGetWorkGroup(global, &local, max_global[0]);
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/fp32/batchnorm.h"
namespace mindspore::kernel {

View File

@ -16,6 +16,7 @@
* limitations under the License.
*/
#include "src/runtime/kernel/opencl/kernel/biasadd.h"
#include <string>
#include <map>
#include <set>
@ -23,7 +24,6 @@
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/runtime/kernel/opencl/kernel/biasadd.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/biasadd.cl.inc"
@ -38,7 +38,7 @@ namespace mindspore::kernel {
void BiasAddOpenCLKernel::InitBuffer() {
int C = in_tensors_[1]->shape()[0];
int div_ci = UP_DIV(C, C4NUM);
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
@ -57,8 +57,7 @@ int BiasAddOpenCLKernel::Init() {
for (int i = 0; i < in_size_; ++i) {
input_shape_.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i];
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
if (in_size_ != 4 && in_size_ != 2) {
MS_LOG(ERROR) << "BiasAdd only support dim=4 or 2, but your dim=" << in_size_;
@ -75,8 +74,8 @@ int BiasAddOpenCLKernel::Init() {
std::string source = biasadd_source;
std::string program_name = "BiasAdd";
std::string kernel_name = "BiasAdd";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@ -89,18 +88,17 @@ int BiasAddOpenCLKernel::Init() {
int BiasAddOpenCLKernel::Run() {
cl_int4 global_size = GetGlobalshape();
MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_idx = 0;
std::map<schema::Format, int> data_type{
{schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}};
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
std::vector<size_t> local = {1, 1};
std::vector<size_t> global = {static_cast<size_t>(global_size.s[1]), static_cast<size_t>(global_size.s[2])};
auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr);
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error.";
return RET_ERROR;

View File

@ -23,7 +23,6 @@
#include "src/tensor.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {

View File

@ -13,13 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/opencl/kernel/concat.h"
#include <cstring>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/concat.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/concat.cl.inc"
@ -40,8 +40,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@ -52,8 +51,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
}
int ConcatOpenCLKernel::RunAxis0() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator_ = ocl_runtime->GetAllocator();
auto allocator_ = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
auto dst_data = out_tensors_[0]->data_c();
auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
@ -64,7 +62,7 @@ int ConcatOpenCLKernel::RunAxis0() {
auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1};
cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
ocl_runtime->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region);
ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region);
dst_origin[1] += region[1];
}
return RET_OK;
@ -112,9 +110,8 @@ int ConcatOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = concat_source;
std::string program_name = "Concat";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
return RET_OK;
}
@ -155,7 +152,6 @@ int ConcatOpenCLKernel::Run() {
return RunAxis0();
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto input1_shape = in_tensors_[0]->shape();
auto input2_shape = in_tensors_[1]->shape();
auto output_shape = out_tensors_[0]->shape();
@ -168,7 +164,7 @@ int ConcatOpenCLKernel::Run() {
uint32_t OW = output_shape[2];
uint32_t OC = UP_DIV(output_shape[3], C4NUM);
const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {OH, OW, OC};
ConcatGetWorkGroup(global, &local, max_global[0]);
@ -176,48 +172,48 @@ int ConcatOpenCLKernel::Run() {
int arg_cn = 0;
if (in_tensors_.size() == 2) {
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
} else if (in_tensors_.size() == 3) {
auto input3_shape = in_tensors_[2]->shape();
cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)};
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
} else if (in_tensors_.size() == 4) {
auto input3_shape = in_tensors_[2]->shape();
auto input4_shape = in_tensors_[3]->shape();
cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)};
cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)};
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape4_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape4_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
} else {
MS_LOG(ERROR) << " input sizes must 2 or 3 or 4";
return RET_ERROR;
}
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/arm/base/concat_base.h"
namespace mindspore::kernel {

View File

@ -14,12 +14,11 @@
* limitations under the License.
*/
#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h"
#include <string>
#include <set>
#include "nnacl/fp32/common_func.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/conv2d_transpose2x2.cl.inc"
#endif
@ -41,16 +40,15 @@ int Conv2dTransposeOpenCLKernel::Init() {
return RET_ERROR;
}
std::string kernel_name = "conv2d_transpose2x2_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::string source = conv2d_transpose2x2_source;
std::set<std::string> build_options;
std::string program_name = "conv2d_transpose2x2";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
PadWeight();
in_ori_format_ = in_tensors_[0]->GetFormat();
@ -71,7 +69,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
int kw = param->kernel_w_;
int div_ci = UP_DIV(ci, C4NUM);
int div_co = UP_DIV(co, C4NUM);
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
auto data_size = enable_fp16_ ? sizeof(int16_t) : sizeof(float);
// IHWO to OHWI4(I)4(O)(converter format is IHWO)
@ -188,7 +186,6 @@ int Conv2dTransposeOpenCLKernel::Run() {
int ow = out_tensors_[0]->shape()[2];
int h = in_tensors_[0]->shape()[1];
int w = in_tensors_[0]->shape()[2];
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
// local size should less than MAX_GROUP_SIZE
std::vector<size_t> local = {16, 1, 16};
std::vector<size_t> global = {UP_ROUND((size_t)UP_ROUND(oh / 2, 2), local[0]),
@ -200,16 +197,16 @@ int Conv2dTransposeOpenCLKernel::Run() {
cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1};
cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1};
int arg_cnt = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -21,7 +21,6 @@
#include "src/lite_kernel.h"
#include "nnacl/conv_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::kernel {

View File

@ -39,12 +39,11 @@ constexpr size_t CO_TILE = C4NUM;
int ConvolutionOpenCLKernel::Init() {
static int init_count = 0;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
auto param = reinterpret_cast<ConvParameter *>(op_parameter_);
std::set<std::string> build_options;
init_count++;
use_fp16_ = ocl_runtime->GetFp16Enable();
use_fp16_ = ocl_runtime_->GetFp16Enable();
if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!";
@ -76,21 +75,21 @@ int ConvolutionOpenCLKernel::Init() {
MS_LOG(DEBUG) << "use winograd";
std::string program_name;
program_name = "Winograd4x4To36" + std::to_string(init_count);
ocl_runtime->LoadSource(program_name, CodeGenWinograd4x4To36());
ocl_runtime->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);
ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36());
ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);
program_name = "WinogradConvolution" + std::to_string(init_count);
ocl_runtime->LoadSource(program_name, CodeGenWinogradConvolution());
ocl_runtime->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);
ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution());
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);
program_name = "Winograd36To4x4" + std::to_string(init_count);
ocl_runtime->LoadSource(program_name, CodeGenWinograd36To4x4());
ocl_runtime->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4());
ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
} else {
std::string program_name = "convolution" + std::to_string(init_count);
std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);
}
// allocate winograd memory
@ -167,7 +166,7 @@ int ConvolutionOpenCLKernel::GenerateWinogradWeight() {
}
int ConvolutionOpenCLKernel::InitWeight() {
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
// allocate memory
size_t packed_weight_size;
@ -205,8 +204,7 @@ int ConvolutionOpenCLKernel::InitWeight() {
}
int ConvolutionOpenCLKernel::InitBias() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
// align bias from C to C4
auto bias_tensor = in_tensors_[2];
@ -272,57 +270,56 @@ int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_s
int ConvolutionOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_cn = 0;
if (use_winograd_) {
arg_cn = 0;
cl_int4 _4x4to36_in_shape = {1, IH_, IW_, CI_SLICES_};
cl_int4 _4x4to36_out_shape = {1, 36, TILES_XY_, CI_SLICES_};
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape);
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape);
arg_cn = 0;
cl_int4 conv_in_shape = {1, 36, TILES_XY_, CI_SLICES_};
cl_int4 conv_out_shape = {1, 36, TILES_XY_, CO_SLICES_};
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape);
arg_cn = 0;
cl_int4 _36to4x4_in_shape = {1, 16, TILES_XY_, CO_SLICES_};
cl_int4 _36to4x4_out_shape = {1, OH_, OW_, CO_SLICES_};
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape);
} else {
arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
if (op_format_ == Format_NC4HW4) {
cl_int4 input_shape = {1, IH_, IW_, CI_SLICES_};
cl_int4 output_shape = {1, OH_, OW_, CO_SLICES_};
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, input_shape);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, output_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, input_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, output_shape);
}
}
if (use_winograd_) {
ocl_runtime->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr);
ocl_runtime->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr);
ocl_runtime->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr);
ocl_runtime_->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr);
ocl_runtime_->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr);
ocl_runtime_->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr);
} else {
std::vector<size_t> global, local;
SetGlobalLocalConv(&global, &local);
ocl_runtime->RunKernel(kernel_conv_, global, local, nullptr);
ocl_runtime_->RunKernel(kernel_conv_, global, local, nullptr);
}
return RET_OK;
@ -819,10 +816,9 @@ std::string ConvolutionOpenCLKernel::CodeGenWinograd36To4x4() {
}
int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector<size_t> *global, std::vector<size_t> *local) {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
constexpr size_t work_group_size[] = {4, 4, 1};
auto max_work_item_sizes = ocl_runtime->GetWorkItemSize();
size_t max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime->Device())());
auto max_work_item_sizes = ocl_runtime_->GetWorkItemSize();
size_t max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime_->Device())());
const size_t max_z_size = std::min<size_t>(16, max_work_item_sizes[2]);
size_t global_h = UP_DIV(OH_, work_group_size[0]) * work_group_size[0];

View File

@ -22,7 +22,6 @@
#include "src/tensor.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/conv_parameter.h"
namespace mindspore::kernel {

View File

@ -21,7 +21,6 @@
#include <map>
#include <utility>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "nnacl/fp32/common_func.h"
#include "nnacl/op_base.h"
@ -42,7 +41,6 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D;
namespace mindspore::kernel {
int DepthwiseConv2dOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name = "DepthwiseConv2d";
auto in_format = op_format_;
in_ori_format_ = in_tensors_[0]->GetFormat();
@ -69,13 +67,13 @@ int DepthwiseConv2dOpenCLKernel::Init() {
kernel_name += "_1x1";
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::string program_name = "DepthwiseConv2d";
std::set<std::string> build_options;
std::string source = depthwise_conv2d_source;
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
this->InitBuffer();
MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_);
@ -84,9 +82,8 @@ int DepthwiseConv2dOpenCLKernel::Init() {
int DepthwiseConv2dOpenCLKernel::InitBuffer() {
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
bool is_fp16 = ocl_runtime->GetFp16Enable();
auto allocator = ocl_runtime_->GetAllocator();
bool is_fp16 = ocl_runtime_->GetFp16Enable();
// weight: o, h, w, i; o == group, i == 1
void *origin_weight = in_tensors_.at(kWeightIndex)->data_c();
@ -162,7 +159,7 @@ int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *i
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
if (lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) {
if (ocl_runtime_->GetFp16Enable()) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
@ -189,7 +186,6 @@ int DepthwiseConv2dOpenCLKernel::GetLocalSize(size_t idx, const std::vector<size
int DepthwiseConv2dOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4};
@ -207,19 +203,19 @@ int DepthwiseConv2dOpenCLKernel::Run() {
(cl_int)out_tensors_[0]->Batch()};
int arg_cnt = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dilation);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/conv_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {

View File

@ -19,7 +19,6 @@
#include <set>
#include <utility>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/gather.h"
#include "src/runtime/kernel/opencl/cl/gather.cl.inc"
@ -49,9 +48,8 @@ int GatherOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = gather_source;
std::string program_name = "gather";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
// init indices_data_
auto indices_tensor = in_tensors_.at(1);
int indices_num = indices_tensor->ElementsNum();
@ -104,8 +102,7 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@ -117,7 +114,6 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
int GatherOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<GatherParameter *>(this->op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
if (InitBuffer() != RET_OK) {
return RET_ERROR;
@ -134,14 +130,14 @@ int GatherOpenCLKernel::Run() {
std::vector<size_t> local = {1, 1, 1};
std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4};
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, src_size);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, dst_size);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_num);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dst_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_num);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/gather_parameter.h"
namespace mindspore::kernel {

View File

@ -19,7 +19,6 @@
#include <map>
#include "nnacl/fp32/common_func.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/matmul.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/matmul.cl.inc"
@ -35,7 +34,6 @@ namespace mindspore::kernel {
int MatMulOpenCLKernel::Init() {
std::string kernel_name = "MatMul";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto param = reinterpret_cast<MatMulParameter *>(op_parameter_);
transposeA = param->a_transpose_;
if (transposeA) {
@ -43,7 +41,7 @@ int MatMulOpenCLKernel::Init() {
return RET_ERROR;
}
transposeB = param->b_transpose_;
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (in_tensors_[0]->shape().size() != out_tensors_[0]->shape().size() ||
(in_tensors_[0]->shape().size() != 2 && in_tensors_[0]->shape().size() != 4)) {
MS_LOG(ERROR) << "matmul only support input shape size=2 or 4.";
@ -57,13 +55,13 @@ int MatMulOpenCLKernel::Init() {
std::map<int, std::string> dims2str = {{2, "_2d"}, {4, "_4d"}};
kernel_name += dims2str[dims];
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = matmul_source;
std::string program_name = "MatMul";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
PadWeight();
@ -79,7 +77,7 @@ int MatMulOpenCLKernel::ReSize() { return RET_OK; }
void MatMulOpenCLKernel::PadWeight() {
// ABMCI @ ABCICO = ABMCO
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
int ci = inShape[3];
int ci4 = UP_DIV(ci, C4NUM);
int co = outShape[3];
@ -201,7 +199,6 @@ int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
int MatMulOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
// local size should less than MAX_GROUP_SIZE
std::vector<size_t> local = {32, 4, 1};
std::vector<size_t> global = {UP_DIV(static_cast<size_t>(outShape[3]), C4NUM),
@ -210,14 +207,14 @@ int MatMulOpenCLKernel::Run() {
int arg_count = 0;
cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_);
ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape);
ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape);
ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -21,7 +21,6 @@
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/matmul_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {

View File

@ -20,8 +20,6 @@
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/opencl/opencl_wrapper.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/image_format.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/avg_pool2d.cl.inc"
@ -59,10 +57,9 @@ int PoolingOpenCLKernel::Init() {
MS_LOG(ERROR) << "Init `Pooling2d` kernel failed!";
return RET_INVALID_OP_NAME;
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
if (out_mem_type_ == OpenCLMemType::BUF) {
@ -72,8 +69,8 @@ int PoolingOpenCLKernel::Init() {
kernel_name += "_IMG";
}
std::set<std::string> build_options;
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@ -124,7 +121,6 @@ int PoolingOpenCLKernel::ReSize() { return RET_OK; }
int PoolingOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices};
@ -135,21 +131,21 @@ int PoolingOpenCLKernel::Run() {
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding);
std::vector<size_t> local_size;
std::vector<size_t> global_size = InitGlobalSize();
int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())());
int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())());
local_size = GetCommonLocalSize(global_size, max_work_group_size);
global_size = GetCommonGlobalSize(local_size, global_size);
ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr);
ocl_runtime_->RunKernel(kernel_, global_size, local_size, nullptr);
return RET_OK;
}

View File

@ -21,7 +21,6 @@
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/pooling.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {

View File

@ -24,7 +24,6 @@
#include "include/errorcode.h"
#include "nnacl/fp32/common_func.h"
#include "src/runtime/kernel/opencl/kernel/prelu.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/prelu.cl.inc"
using mindspore::kernel::KERNEL_ARCH::kGPU;
@ -36,7 +35,7 @@ using mindspore::schema::PrimitiveType_PReLU;
namespace mindspore::kernel {
void PReluOpenCLKernel::InitBuffer() {
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
int elem_num = in_tensors_[0]->shape().size() == 2 ? in_tensors_[0]->shape()[1] : in_tensors_[0]->shape()[3];
int elem_num_c4 = UP_DIV(elem_num, C4NUM);
size_t img_dtype = CL_FLOAT;
@ -91,12 +90,11 @@ int PReluOpenCLKernel::Init() {
std::string source = prelu_source;
std::string program_name = "PRelu";
std::string kernel_name = "PRelu";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
InitBuffer();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
@ -107,18 +105,17 @@ int PReluOpenCLKernel::Init() {
int PReluOpenCLKernel::Run() {
MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::map<schema::Format, int> data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, reinterpret_cast<int>(in_tensors_[1]->shape()[0]));
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, PReluWeight_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, reinterpret_cast<int>(in_tensors_[1]->shape()[0]));
std::vector<size_t> local = {1, 1};
std::vector<size_t> global = {static_cast<size_t>(global_shape_.s[1]), static_cast<size_t>(global_shape_.s[2])};
auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr);
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error.";
return RET_ERROR;

View File

@ -22,7 +22,6 @@
#include "src/tensor.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {

View File

@ -19,7 +19,6 @@
#include <map>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/reduce.h"
#include "src/runtime/kernel/opencl/cl/reduce.cl.inc"
@ -59,8 +58,7 @@ int ReduceOpenCLKernel::Init() {
}
std::string kernel_name = reduce_type2str.at(reduce_param->mode_);
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) {
MS_LOG(ERROR) << "Reduce input channel " << in_tensors_[0]->shape().back() << " should equal output channel"
@ -68,12 +66,12 @@ int ReduceOpenCLKernel::Init() {
return RET_ERROR;
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = reduce_source;
ocl_runtime->LoadSource(kernel_name, source);
ocl_runtime->BuildKernel(kernel_, kernel_name, kernel_name, build_options);
ocl_runtime_->LoadSource(kernel_name, source);
ocl_runtime_->BuildKernel(kernel_, kernel_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@ -130,15 +128,14 @@ int ReduceOpenCLKernel::Run() {
int w = shapex[2];
int c = shapex[3];
int c4 = UP_DIV(c, C4NUM);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {};
std::vector<size_t> global = {static_cast<size_t>(c4)};
cl_int4 size = {h, w, c4, 1};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/reduce_parameter.h"

View File

@ -18,7 +18,6 @@
#include <string>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/reshape.h"
#include "src/runtime/kernel/opencl/cl/reshape.cl.inc"
@ -34,8 +33,7 @@ namespace mindspore::kernel {
int ReshapeOpenCLKernel::Init() {
std::string kernel_name = "reshape";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) {
MS_LOG(ERROR) << "Reshape output size should in 2,4";
return RET_ERROR;
@ -46,13 +44,13 @@ int ReshapeOpenCLKernel::Init() {
return RET_ERROR;
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = reshape_source;
std::string program_name = "reshape";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@ -112,17 +110,16 @@ int ReshapeOpenCLKernel::Run() {
oh = out_tensors_[0]->shape()[1];
ow = out_tensors_[0]->shape()[2];
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {};
std::vector<size_t> global = {(size_t)oh, (size_t)ow, (size_t)c4};
cl_int4 size = {h, w, c4, 1};
cl_int4 size_out = {oh, ow, c4, 1};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size_out);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::kernel {

View File

@ -245,7 +245,6 @@ int ScaleOpenCLKernel::InitBuffer() {
}
int ScaleOpenCLKernel::Init() {
ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name;
const ScaleParameter *scale_param = reinterpret_cast<const ScaleParameter *>(op_parameter_);

View File

@ -19,7 +19,6 @@
#include <vector>
#include "nnacl/scale.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::kernel {
@ -42,7 +41,6 @@ class ScaleOpenCLKernel : public OpenCLKernel {
int InitBuffer();
cl::Kernel kernel_;
lite::opencl::OpenCLRuntime *ocl_runtime_;
bool element_flag_{true};
void *scale_ptr_{nullptr};
void *offset_ptr_{nullptr};

View File

@ -18,7 +18,6 @@
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/slice.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/slice.cl.inc"
@ -40,8 +39,7 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@ -71,9 +69,8 @@ int SliceOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = slice_source;
std::string program_name = "slice";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
return RET_OK;
}
@ -96,7 +93,6 @@ void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l
int SliceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto input_shape = in_tensors_[0]->shape();
cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)};
cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)};
@ -105,18 +101,18 @@ int SliceOpenCLKernel::Run() {
uint32_t OH = param->size_[1];
uint32_t OW = param->size_[2];
const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {1, OH, OW};
SlcieGetWorkGroup(global, &local, max_global[0]);
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/fp32/slice.h"
namespace mindspore::kernel {

View File

@ -19,7 +19,6 @@
#include <set>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/utils.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/softmax.cl.inc"
@ -51,7 +50,7 @@ int SoftmaxOpenCLKernel::InitGlobalSize() {
int SoftmaxOpenCLKernel::SetWorkGroupSize() {
// set work group size
InitGlobalSize();
int max_work_group_size = runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*runtime_->Device())());
int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())());
local_size_ = GetCommonLocalSize(global_size_, max_work_group_size);
global_size_ = GetCommonGlobalSize(local_size_, global_size_);
return lite::RET_OK;
@ -101,8 +100,7 @@ int SoftmaxOpenCLKernel::Init() {
std::string program_name = "SoftMax";
std::string source = softmax_source;
runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = runtime_->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
// framework not set this param yet! just use default.
if (in_tensors_[0]->shape().size() == 4) {
// support 4d tensor
@ -133,8 +131,8 @@ int SoftmaxOpenCLKernel::Init() {
program_name += "_IMG";
}
std::set<std::string> build_options;
runtime_->LoadSource(program_name, source);
runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@ -158,32 +156,32 @@ int SoftmaxOpenCLKernel::Run() {
auto mask_ = GetMaskForLastChannel(channel_size);
cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]};
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (is_image_out_) {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
} else {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
}
runtime_->SetKernelArg(kernel_, arg_idx++, mask);
runtime_->SetKernelArg(kernel_, arg_idx++, slices);
runtime_->SetKernelArg(kernel_, arg_idx, slices_x32);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, slices);
ocl_runtime_->SetKernelArg(kernel_, arg_idx, slices_x32);
SetWorkGroupSize1x1();
} else {
int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices};
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (is_image_out_) {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
} else {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
}
runtime_->SetKernelArg(kernel_, arg_idx, input_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape);
SetWorkGroupSize();
}
// run opengl kernel
runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
return lite::RET_OK;
}

View File

@ -21,7 +21,6 @@
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/softmax.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {
@ -46,7 +45,6 @@ class SoftmaxOpenCLKernel : public OpenCLKernel {
private:
cl::Kernel kernel_;
SoftmaxParameter *parameter_;
lite::opencl::OpenCLRuntime *runtime_;
bool onexone_flag_{false};
std::vector<size_t> local_size_;

View File

@ -21,7 +21,6 @@
#include <utility>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/to_format.cl.inc"
using mindspore::kernel::KERNEL_ARCH::kGPU;
@ -33,7 +32,6 @@ using mindspore::schema::PrimitiveType_ToFormat;
namespace mindspore::kernel {
int ToFormatOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto parameter = reinterpret_cast<OpenCLToFormatParameter *>(op_parameter_);
out_mem_type_ = parameter->out_mem_type;
std::string program_name = "to_format";
@ -53,12 +51,12 @@ int ToFormatOpenCLKernel::Init() {
this->set_name(kernel_name);
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = to_format_source;
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
InitNHWCShape();
MS_LOG(DEBUG) << kernel_name << " Init Done!";
@ -147,7 +145,7 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
return RET_ERROR;
}
img_size->clear();
auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
@ -158,7 +156,6 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
}
int ToFormatOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {};
std::vector<size_t> global;
GetGlobalSize(0, &global);
@ -167,11 +164,11 @@ int ToFormatOpenCLKernel::Run() {
cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1};
auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG;
auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF;
ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type);
ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type);
ocl_runtime->SetKernelArg(kernel_, 2, gsize);
ocl_runtime->SetKernelArg(kernel_, 3, shape);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type);
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type);
ocl_runtime_->SetKernelArg(kernel_, 2, gsize);
ocl_runtime_->SetKernelArg(kernel_, 3, shape);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -20,7 +20,6 @@
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::kernel {

View File

@ -18,7 +18,6 @@
#include <string>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/transpose.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/transpose.cl.inc"
@ -34,8 +33,7 @@ namespace mindspore::kernel {
int TransposeOpenCLKernel::Init() {
std::string kernel_name = "transpose";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
auto param = reinterpret_cast<TransposeParameter *>(op_parameter_);
if (param->num_axes_ == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 &&
param->perm_[3] == 2) {
@ -52,13 +50,13 @@ int TransposeOpenCLKernel::Init() {
kernel_name += "_IMG";
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = transpose_source;
std::string program_name = "transpose";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
if ((in_tensors_[0]->shape()[1] * in_tensors_[0]->shape()[2]) % 4 != 0) {
MS_LOG(ERROR) << "input H * W % 4 != 0 not support!";
@ -114,24 +112,23 @@ int TransposeOpenCLKernel::Run() {
int c = shapex[3];
int c4 = UP_DIV(c, 4);
int hw4 = UP_DIV(h * w, 4);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {16, 16};
std::vector<size_t> global = {UP_ROUND(hw4, local[0]), UP_ROUND(c4, local[1])};
cl_int2 HW = {h * w, hw4};
cl_int2 C = {c, c4};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (out_mem_type_ == OpenCLMemType::BUF) {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
} else {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
}
ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, C);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, w);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, h);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, HW);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, C);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, w);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, h);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

View File

@ -21,7 +21,6 @@
#include "src/lite_kernel.h"
#include "nnacl/transpose.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
namespace mindspore::kernel {

View File

@ -20,6 +20,7 @@
#include <vector>
#include "src/lite_kernel.h"
#include "include/errorcode.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {
@ -36,7 +37,16 @@ class OpenCLKernel : public LiteKernel {
public:
explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {}
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {
ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
}
~OpenCLKernel() {
if (ocl_runtime_ != nullptr) {
lite::opencl::OpenCLRuntime::DeleteInstance();
ocl_runtime_ = nullptr;
}
}
virtual int Init() { return RET_ERROR; }
virtual int Prepare() { return RET_ERROR; }
@ -59,6 +69,7 @@ class OpenCLKernel : public LiteKernel {
schema::Format in_ori_format_{schema::Format::Format_NHWC};
schema::Format out_ori_format_{schema::Format::Format_NHWC4};
schema::Format op_format_{schema::Format::Format_NHWC4};
lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr};
};
} // namespace mindspore::kernel

View File

@ -99,7 +99,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
out_tensors->emplace_back(new_tensor);
KernelKey desc{kGPU, kNumberTypeFloat32, schema::PrimitiveType_ToFormat};
if (mem_type == OpenCLMemType::IMG && lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) {
if (mem_type == OpenCLMemType::IMG && ocl_runtime_->GetFp16Enable()) {
desc.data_type = kNumberTypeFloat16;
new_tensor->set_data_type(kNumberTypeFloat16);
}
@ -160,7 +160,8 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
}
int SubGraphOpenCLKernel::Init() {
allocator_ = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
allocator_ = ocl_runtime_->GetAllocator();
MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size();
for (const auto tensor : in_tensors_) {
tensor->set_allocator(allocator_);
@ -195,8 +196,7 @@ int SubGraphOpenCLKernel::Init() {
}
int SubGraphOpenCLKernel::UpdateTensorDataType() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
bool is_fp16 = ocl_runtime->GetFp16Enable();
bool is_fp16 = ocl_runtime_->GetFp16Enable();
if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) {
std::set<lite::Tensor *> out_set;
out_set.insert(in_tensors_.begin(), in_tensors_.end());
@ -292,16 +292,25 @@ int SubGraphOpenCLKernel::UnInit() {
delete tensor;
}
}
in_convert_tensors_.clear();
for (const auto &tensor : out_convert_tensors_) {
if (tensor != nullptr) {
delete tensor;
}
}
for (const auto &op : in_convert_ops_) {
out_convert_tensors_.clear();
for (const auto &op : nodes_) {
if (op != nullptr) {
delete op;
}
}
nodes_.clear();
in_convert_ops_.clear();
out_convert_ops_.clear();
if (ocl_runtime_ != nullptr) {
lite::opencl::OpenCLRuntime::DeleteInstance();
ocl_runtime_ = nullptr;
}
return RET_OK;
}
@ -310,14 +319,13 @@ int SubGraphOpenCLKernel::InferShape() { return RET_OK; }
int SubGraphOpenCLKernel::ReSize() { return RET_OK; }
int SubGraphOpenCLKernel::Run() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
for (auto &tensor : in_tensors_) {
allocator_->UnmapBuffer(tensor->data_c());
}
lite::opencl::OpenCLExecutor executor;
executor.Run(in_tensors_, out_tensors_, nodes_, allocator_);
ocl_runtime->SyncCommandQueue();
ocl_runtime_->SyncCommandQueue();
return RET_OK;
}

View File

@ -64,6 +64,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel {
std::vector<OpenCLToFormatParameter *> out_parameters_;
std::vector<LiteKernel *> in_convert_ops_;
std::vector<LiteKernel *> out_convert_ops_;
lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr};
};
} // namespace mindspore::kernel

View File

@ -23,8 +23,6 @@
namespace mindspore::lite::opencl {
OpenCLAllocator::OpenCLAllocator() {}
OpenCLAllocator::OpenCLAllocator(OpenCLRuntime *ocl_runtime) : ocl_runtime_(ocl_runtime) {}
OpenCLAllocator::~OpenCLAllocator() { Clear(); }
@ -49,9 +47,6 @@ void OpenCLAllocator::UnLock() {
void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector<size_t>{}); }
void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size) {
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
size_t img_pitch = 0;
@ -144,9 +139,6 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v
MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
return nullptr;
}
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
Lock();
auto iter = free_list_.lower_bound(size);
while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) {
@ -258,9 +250,6 @@ void *OpenCLAllocator::GetBuffer(void *buffer) {
void OpenCLAllocator::Clear() {
Lock();
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) {
if (svm_capabilities) {
@ -306,9 +295,6 @@ void OpenCLAllocator::Clear() {
}
void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) {
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
if (svm_capabilities) {
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
@ -362,9 +348,6 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
}
int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
if (svm_capabilities) {
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {

View File

@ -45,7 +45,6 @@ enum class MemType : char { SVM, BUF, IMG };
class OpenCLAllocator : public Allocator {
public:
OpenCLAllocator();
explicit OpenCLAllocator(OpenCLRuntime *ocl_runtime);
~OpenCLAllocator() override;
void SetContext(const AllocatorContext &ctx) override;

View File

@ -27,7 +27,11 @@
namespace mindspore::lite::opencl {
class OpenCLExecutor : Executor {
public:
OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); }
OpenCLExecutor() : Executor() {
auto ocl_runtime = OpenCLRuntime::GetInstance();
allocator_ = ocl_runtime->GetAllocator();
OpenCLRuntime::DeleteInstance();
}
int Prepare(const std::vector<kernel::LiteKernel *> &kernels);

View File

@ -244,7 +244,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens
TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())};
#if SUPPORT_GPU
if (context_->device_type_ == DT_GPU && lite::opencl::OpenCLRuntime::GetInstance()->IsInitOK()) {
if (context_->device_type_ == DT_GPU) {
desc.arch = kernel::KERNEL_ARCH::kGPU;
auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc);
if (kernel != nullptr) {

View File

@ -157,7 +157,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -167,7 +166,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -182,7 +180,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
printf_tensor<float>("ReluFp32--output data--", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -271,7 +268,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -281,7 +277,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -297,7 +292,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
printf_tensor<float>("Relu6:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -386,7 +380,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -396,7 +389,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -412,7 +404,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
printf_tensor<float>("Sigmoid:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -502,7 +493,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -512,7 +502,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -527,7 +516,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
printf_tensor<float>("Leaky Relu:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -616,7 +604,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -626,7 +613,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@ -642,7 +628,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
printf_tensor<float>("Tanh:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;

View File

@ -127,7 +127,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) {
delete tensor;
}
delete param;
delete arithmeticself_kernel;
delete sub_graph;
}
} // namespace mindspore

View File

@ -203,7 +203,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
delete[] data_c_ocl;
delete kernel;
delete arith_kernel;
delete param;
for (auto tensor : inputs) {
delete tensor;

View File

@ -147,7 +147,6 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
delete tensor;
}
delete param;
delete batchnorm_kernel;
delete sub_graph;
}
TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {

View File

@ -174,7 +174,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete weight_tensor;
delete sub_graph;
delete param;
delete biasadd_kernel;
return;
}
MS_LOG(INFO) << "Sub graph begin running!";
@ -186,7 +185,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete weight_tensor;
delete sub_graph;
delete param;
delete biasadd_kernel;
return;
}
@ -202,7 +200,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete output_tensor;
delete sub_graph;
delete param;
delete biasadd_kernel;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

View File

@ -164,7 +164,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
delete tensor;
}
delete param;
delete concat_kernel;
delete sub_graph;
}
@ -284,7 +283,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
delete tensor;
}
delete param;
delete concat_kernel;
delete sub_graph;
}
} // namespace mindspore

View File

@ -78,7 +78,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in
std::cout << "==================output data================" << std::endl;
auto *output_data = reinterpret_cast<T *>(outputs[0]->data_c());
CommonTest::CompareOutputData<T>(output_data, static_cast<T*>(correct_data), outputs[0]->ElementsNum(), 0.0001);
delete pkernel;
delete sub_graph;
}
TEST_F(TestGatherOpenCL, Axis1Fp32) {

View File

@ -167,7 +167,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete output_tensor;
delete weight_tensor;
delete param;
delete prelu_kernel;
delete sub_graph;
return;
}
@ -179,7 +178,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete output_tensor;
delete weight_tensor;
delete param;
delete prelu_kernel;
delete sub_graph;
return;
}
@ -195,7 +193,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete output_tensor;
delete weight_tensor;
delete param;
delete prelu_kernel;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}

View File

@ -223,7 +223,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
delete[] data_out_ocl;
delete kernel;
delete scale_kernel;
delete param;
for (auto tensor : inputs) {
delete tensor;

View File

@ -143,7 +143,6 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
for (auto tensor : outputs) {
delete tensor;
}
delete slice_kernel;
delete sub_graph;
}
TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
@ -251,7 +250,6 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
for (auto tensor : outputs) {
delete tensor;
}
delete slice_kernel;
delete sub_graph;
}
} // namespace mindspore