code clean for opencl

This commit is contained in:
chenzomi 2020-08-19 18:45:30 +08:00
parent f20e68a879
commit 990c645c85
5 changed files with 70 additions and 39 deletions

View File

@ -112,7 +112,6 @@ int PoolingOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!"; MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
// attribute
int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM); int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices}; cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices};
cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices}; cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices};
@ -120,7 +119,6 @@ int PoolingOpenCLKernel::Run() {
cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
// binding parameters
int arg_idx = 0; int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
@ -130,14 +128,12 @@ int PoolingOpenCLKernel::Run() {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size); ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding); ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding);
// set work group size
std::vector<size_t> local_size; std::vector<size_t> local_size;
std::vector<size_t> global_size = InitGlobalSize(); std::vector<size_t> global_size = InitGlobalSize();
int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())()); int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())());
local_size = GetCommonLocalSize(global_size, max_work_group_size); local_size = GetCommonLocalSize(global_size, max_work_group_size);
global_size = GetCommonGlobalSize(local_size, global_size); global_size = GetCommonGlobalSize(local_size, global_size);
// run opengl kernel
ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr); ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr);
return RET_OK; return RET_OK;
} }

View File

@ -162,10 +162,13 @@ kernel::LiteKernel *OpenCLSoftMaxKernelCreator(const std::vector<lite::tensor::T
auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr."; MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
delete kernel;
return nullptr; return nullptr;
} }
if (inputs[0]->shape()[0] > 1) { if (inputs[0]->shape()[0] > 1) {
MS_LOG(ERROR) << "Init `Softmax` kernel failed: Unsupported multi-batch."; MS_LOG(ERROR) << "Init `Softmax` kernel failed: Unsupported multi-batch.";
delete kernel;
return nullptr;
} }
auto ret = kernel->Init(); auto ret = kernel->Init();
if (0 != ret) { if (0 != ret) {

View File

@ -58,7 +58,7 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
ocl_runtime->Init(); ocl_runtime->Init();
MS_LOG(INFO) << "create PoolingParameter"; MS_LOG(INFO) << "create PoolingParameter";
auto param = new PoolingParameter(); auto param = new (std::nothrow) PoolingParameter();
InitAvgPoolingParam(param); InitAvgPoolingParam(param);
MS_LOG(INFO) << "create Tensors"; MS_LOG(INFO) << "create Tensors";
@ -76,18 +76,37 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
}; };
auto data_type = kNumberTypeFloat32; auto data_type = kNumberTypeFloat32;
auto tensorType = schema::NodeType_ValueNode; auto tensorType = schema::NodeType_ValueNode;
lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType); lite::tensor::Tensor *tensor_in =
lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType); new (std::nothrow) lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType);
lite::tensor::Tensor *tensor_out =
new (std::nothrow) lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType);
if (tensor_in == nullptr) {
MS_LOG(ERROR) << "tensor_in null";
return;
}
if (tensor_out == nullptr) {
MS_LOG(ERROR) << "tensor_out null";
return;
}
std::vector<lite::tensor::Tensor *> inputs{tensor_in}; std::vector<lite::tensor::Tensor *> inputs{tensor_in};
std::vector<lite::tensor::Tensor *> outputs{tensor_out}; std::vector<lite::tensor::Tensor *> outputs{tensor_out};
MS_LOG(INFO) << "create OpenCL Kernel"; MS_LOG(INFO) << "create OpenCL Kernel";
auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); auto *pooling_kernel =
new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (pooling_kernel == nullptr) {
MS_LOG(ERROR) << "pooling_kernel null";
return;
}
pooling_kernel->Init(); pooling_kernel->Init();
std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
MS_LOG(INFO) << "create SubGraphOpenCLKernel"; MS_LOG(INFO) << "create SubGraphOpenCLKernel";
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph null";
return;
}
pGraph->Init(); pGraph->Init();
MS_LOG(INFO) << "initialize data"; MS_LOG(INFO) << "initialize data";

View File

@ -46,7 +46,7 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
auto allocator = ocl_runtime->GetAllocator(); auto allocator = ocl_runtime->GetAllocator();
MS_LOG(INFO) << "PoolingParameter"; MS_LOG(INFO) << "PoolingParameter";
auto param = new PoolingParameter; auto param = new (std::nothrow) PoolingParameter;
InitParameter(param); InitParameter(param);
// define tensor // define tensor
@ -56,21 +56,39 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
auto data_type = kNumberTypeFloat32; auto data_type = kNumberTypeFloat32;
auto tensorType = schema::NodeType_ValueNode; auto tensorType = schema::NodeType_ValueNode;
MS_LOG(INFO) << "define tensor2"; MS_LOG(INFO) << "define tensor2";
auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType); auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType);
auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType); auto output_tensor =
new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType);
if (input_tensor == nullptr) {
MS_LOG(ERROR) << "input_tensor null";
return;
}
if (output_tensor == nullptr) {
MS_LOG(ERROR) << "output_tensor null";
return;
}
MS_LOG(INFO) << "define input"; MS_LOG(INFO) << "define input";
std::vector<lite::tensor::Tensor *> inputs{input_tensor}; std::vector<lite::tensor::Tensor *> inputs{input_tensor};
std::vector<lite::tensor::Tensor *> outputs{output_tensor}; std::vector<lite::tensor::Tensor *> outputs{output_tensor};
// run // run
MS_LOG(INFO) << "pooling_kernel"; MS_LOG(INFO) << "pooling_kernel";
auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); auto *pooling_kernel =
new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (pooling_kernel == nullptr) {
MS_LOG(ERROR) << "pooling_kernel null";
return;
}
MS_LOG(INFO) << "pooling_kernel init"; MS_LOG(INFO) << "pooling_kernel init";
pooling_kernel->Init(); pooling_kernel->Init();
std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
inputs[0]->MallocData(allocator); inputs[0]->MallocData(allocator);
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph null";
return;
}
MS_LOG(INFO) << "pGraph init"; MS_LOG(INFO) << "pGraph init";
pGraph->Init(); pGraph->Init();

View File

@ -28,41 +28,49 @@ class TestSoftmaxOpenCL : public mindspore::CommonTest {};
void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file, void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file,
std::string expect_file, SoftmaxParameter *param, schema::Format format) { std::string expect_file, SoftmaxParameter *param, schema::Format format) {
std::cout << "runtime" << std::endl;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->Init(); ocl_runtime->Init();
auto allocator = ocl_runtime->GetAllocator(); auto allocator = ocl_runtime->GetAllocator();
// define tensor // define tensor
MS_LOG(INFO) << "defineTensor"; MS_LOG(INFO) << "defineTensor";
std::cout << "defineTensor" << std::endl;
auto data_type = kNumberTypeFloat32; auto data_type = kNumberTypeFloat32;
auto tensorType = schema::NodeType_ValueNode; auto tensorType = schema::NodeType_ValueNode;
auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, format, tensorType); auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensorType);
auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, format, tensorType); auto output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, format, tensorType);
if (input_tensor == nullptr) {
MS_LOG(ERROR) << "input tensor null";
return;
}
if (output_tensor == nullptr) {
MS_LOG(ERROR) << "output tensor null";
return;
}
std::vector<lite::tensor::Tensor *> inputs{input_tensor}; std::vector<lite::tensor::Tensor *> inputs{input_tensor};
std::vector<lite::tensor::Tensor *> outputs{output_tensor}; std::vector<lite::tensor::Tensor *> outputs{output_tensor};
// run // run
MS_LOG(INFO) << "NewOpenCLKernel"; MS_LOG(INFO) << "NewOpenCLKernel";
std::cout << "NewOpenCLKernel" << std::endl;
auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel null";
return;
}
MS_LOG(INFO) << "KernelInit"; MS_LOG(INFO) << "KernelInit";
std::cout << "KernelInit" << std::endl;
kernel->Init(); kernel->Init();
std::cout << "LiteKernel" << std::endl;
std::vector<kernel::LiteKernel *> kernels{kernel}; std::vector<kernel::LiteKernel *> kernels{kernel};
inputs[0]->MallocData(allocator); inputs[0]->MallocData(allocator);
std::cout << "SubGraphOpenCLKernel" << std::endl; auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph null";
return;
}
MS_LOG(INFO) << "pGraphinit"; MS_LOG(INFO) << "pGraphinit";
pGraph->Init(); pGraph->Init();
// load data // load data
MS_LOG(INFO) << "load data1"; MS_LOG(INFO) << "load data1";
LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file); LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file);
auto *input_data = reinterpret_cast<float *>(input_tensor->Data()); auto *input_data = reinterpret_cast<float *>(input_tensor->Data());
printf("\ninput[0:10]:"); printf("\ninput[0:10]:");
@ -75,7 +83,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
pGraph->Run(); pGraph->Run();
MS_LOG(INFO) << "compare result"; MS_LOG(INFO) << "compare result";
std::cout << "compare result" << std::endl;
CompareOutput(output_tensor, expect_file); CompareOutput(output_tensor, expect_file);
} }
@ -84,23 +91,11 @@ TEST_F(TestSoftmaxOpenCL, Softmax_1) {
std::vector<int> output_shape = {1, 2, 2, 8}; std::vector<int> output_shape = {1, 2, 2, 8};
std::string input_file = "softmax_in.bin"; std::string input_file = "softmax_in.bin";
std::string expect_file = "softmax_out.bin"; std::string expect_file = "softmax_out.bin";
auto param = new SoftmaxParameter; auto param = new (std::nothrow) SoftmaxParameter;
param->axis_ = 3; param->axis_ = 3;
schema::Format format = schema::Format_NHWC4; schema::Format format = schema::Format_NHWC4;
RunTestCase(input_shape, output_shape, input_file, expect_file, param, format); RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
} }
// TEST_F(TestSoftmaxOpenCL, Softmax_1x1) {
// std::vector<int> input_shape = {1, 100};
// std::vector<int> output_shape = {1, 100};
// std::string input_file = "softmax1x1_in.bin";
// std::string expect_file = "softmax1x1_out.bin";
// auto param = new SoftmaxParameter;
// param->axis_ = 1;
// schema::Format format = schema::Format_NHWC4;
//
// RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
//}
} // namespace mindspore } // namespace mindspore