forked from mindspore-Ecosystem/mindspore
code clean for opencl
This commit is contained in:
parent
f20e68a879
commit
990c645c85
|
@ -112,7 +112,6 @@ int PoolingOpenCLKernel::Run() {
|
||||||
MS_LOG(DEBUG) << this->name() << " Running!";
|
MS_LOG(DEBUG) << this->name() << " Running!";
|
||||||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||||
|
|
||||||
// attribute
|
|
||||||
int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
|
||||||
cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices};
|
cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices};
|
||||||
cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices};
|
cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices};
|
||||||
|
@ -120,7 +119,6 @@ int PoolingOpenCLKernel::Run() {
|
||||||
cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
|
cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
|
||||||
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
|
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
|
||||||
|
|
||||||
// binding parameters
|
|
||||||
int arg_idx = 0;
|
int arg_idx = 0;
|
||||||
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
|
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
|
||||||
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
|
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
|
||||||
|
@ -130,14 +128,12 @@ int PoolingOpenCLKernel::Run() {
|
||||||
ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size);
|
ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size);
|
||||||
ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding);
|
ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding);
|
||||||
|
|
||||||
// set work group size
|
|
||||||
std::vector<size_t> local_size;
|
std::vector<size_t> local_size;
|
||||||
std::vector<size_t> global_size = InitGlobalSize();
|
std::vector<size_t> global_size = InitGlobalSize();
|
||||||
int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())());
|
int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())());
|
||||||
local_size = GetCommonLocalSize(global_size, max_work_group_size);
|
local_size = GetCommonLocalSize(global_size, max_work_group_size);
|
||||||
global_size = GetCommonGlobalSize(local_size, global_size);
|
global_size = GetCommonGlobalSize(local_size, global_size);
|
||||||
|
|
||||||
// run opengl kernel
|
|
||||||
ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr);
|
ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,10 +162,13 @@ kernel::LiteKernel *OpenCLSoftMaxKernelCreator(const std::vector<lite::tensor::T
|
||||||
auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
|
auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
|
||||||
if (kernel == nullptr) {
|
if (kernel == nullptr) {
|
||||||
MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
|
MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
|
||||||
|
delete kernel;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (inputs[0]->shape()[0] > 1) {
|
if (inputs[0]->shape()[0] > 1) {
|
||||||
MS_LOG(ERROR) << "Init `Softmax` kernel failed: Unsupported multi-batch.";
|
MS_LOG(ERROR) << "Init `Softmax` kernel failed: Unsupported multi-batch.";
|
||||||
|
delete kernel;
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto ret = kernel->Init();
|
auto ret = kernel->Init();
|
||||||
if (0 != ret) {
|
if (0 != ret) {
|
||||||
|
|
|
@ -58,7 +58,7 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
|
||||||
ocl_runtime->Init();
|
ocl_runtime->Init();
|
||||||
|
|
||||||
MS_LOG(INFO) << "create PoolingParameter";
|
MS_LOG(INFO) << "create PoolingParameter";
|
||||||
auto param = new PoolingParameter();
|
auto param = new (std::nothrow) PoolingParameter();
|
||||||
InitAvgPoolingParam(param);
|
InitAvgPoolingParam(param);
|
||||||
|
|
||||||
MS_LOG(INFO) << "create Tensors";
|
MS_LOG(INFO) << "create Tensors";
|
||||||
|
@ -76,18 +76,37 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
|
||||||
};
|
};
|
||||||
auto data_type = kNumberTypeFloat32;
|
auto data_type = kNumberTypeFloat32;
|
||||||
auto tensorType = schema::NodeType_ValueNode;
|
auto tensorType = schema::NodeType_ValueNode;
|
||||||
lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType);
|
lite::tensor::Tensor *tensor_in =
|
||||||
lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType);
|
new (std::nothrow) lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType);
|
||||||
|
lite::tensor::Tensor *tensor_out =
|
||||||
|
new (std::nothrow) lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType);
|
||||||
|
if (tensor_in == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "tensor_in null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (tensor_out == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "tensor_out null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
std::vector<lite::tensor::Tensor *> inputs{tensor_in};
|
std::vector<lite::tensor::Tensor *> inputs{tensor_in};
|
||||||
std::vector<lite::tensor::Tensor *> outputs{tensor_out};
|
std::vector<lite::tensor::Tensor *> outputs{tensor_out};
|
||||||
|
|
||||||
MS_LOG(INFO) << "create OpenCL Kernel";
|
MS_LOG(INFO) << "create OpenCL Kernel";
|
||||||
auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
|
auto *pooling_kernel =
|
||||||
|
new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
|
||||||
|
if (pooling_kernel == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "pooling_kernel null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
pooling_kernel->Init();
|
pooling_kernel->Init();
|
||||||
std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
|
std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
|
||||||
|
|
||||||
MS_LOG(INFO) << "create SubGraphOpenCLKernel";
|
MS_LOG(INFO) << "create SubGraphOpenCLKernel";
|
||||||
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
||||||
|
if (pGraph == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "pGraph null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
pGraph->Init();
|
pGraph->Init();
|
||||||
|
|
||||||
MS_LOG(INFO) << "initialize data";
|
MS_LOG(INFO) << "initialize data";
|
||||||
|
|
|
@ -46,7 +46,7 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
|
||||||
auto allocator = ocl_runtime->GetAllocator();
|
auto allocator = ocl_runtime->GetAllocator();
|
||||||
|
|
||||||
MS_LOG(INFO) << "PoolingParameter";
|
MS_LOG(INFO) << "PoolingParameter";
|
||||||
auto param = new PoolingParameter;
|
auto param = new (std::nothrow) PoolingParameter;
|
||||||
InitParameter(param);
|
InitParameter(param);
|
||||||
|
|
||||||
// define tensor
|
// define tensor
|
||||||
|
@ -56,21 +56,39 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
|
||||||
auto data_type = kNumberTypeFloat32;
|
auto data_type = kNumberTypeFloat32;
|
||||||
auto tensorType = schema::NodeType_ValueNode;
|
auto tensorType = schema::NodeType_ValueNode;
|
||||||
MS_LOG(INFO) << "define tensor2";
|
MS_LOG(INFO) << "define tensor2";
|
||||||
auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType);
|
auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType);
|
||||||
auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType);
|
auto output_tensor =
|
||||||
|
new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType);
|
||||||
|
if (input_tensor == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "input_tensor null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (output_tensor == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "output_tensor null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
MS_LOG(INFO) << "define input";
|
MS_LOG(INFO) << "define input";
|
||||||
std::vector<lite::tensor::Tensor *> inputs{input_tensor};
|
std::vector<lite::tensor::Tensor *> inputs{input_tensor};
|
||||||
std::vector<lite::tensor::Tensor *> outputs{output_tensor};
|
std::vector<lite::tensor::Tensor *> outputs{output_tensor};
|
||||||
|
|
||||||
// run
|
// run
|
||||||
MS_LOG(INFO) << "pooling_kernel";
|
MS_LOG(INFO) << "pooling_kernel";
|
||||||
auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
|
auto *pooling_kernel =
|
||||||
|
new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
|
||||||
|
if (pooling_kernel == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "pooling_kernel null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
MS_LOG(INFO) << "pooling_kernel init";
|
MS_LOG(INFO) << "pooling_kernel init";
|
||||||
pooling_kernel->Init();
|
pooling_kernel->Init();
|
||||||
|
|
||||||
std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
|
std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
|
||||||
inputs[0]->MallocData(allocator);
|
inputs[0]->MallocData(allocator);
|
||||||
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
||||||
|
if (pGraph == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "pGraph null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
MS_LOG(INFO) << "pGraph init";
|
MS_LOG(INFO) << "pGraph init";
|
||||||
pGraph->Init();
|
pGraph->Init();
|
||||||
|
|
||||||
|
|
|
@ -28,41 +28,49 @@ class TestSoftmaxOpenCL : public mindspore::CommonTest {};
|
||||||
|
|
||||||
void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file,
|
void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file,
|
||||||
std::string expect_file, SoftmaxParameter *param, schema::Format format) {
|
std::string expect_file, SoftmaxParameter *param, schema::Format format) {
|
||||||
std::cout << "runtime" << std::endl;
|
|
||||||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||||
ocl_runtime->Init();
|
ocl_runtime->Init();
|
||||||
auto allocator = ocl_runtime->GetAllocator();
|
auto allocator = ocl_runtime->GetAllocator();
|
||||||
|
|
||||||
// define tensor
|
// define tensor
|
||||||
MS_LOG(INFO) << "defineTensor";
|
MS_LOG(INFO) << "defineTensor";
|
||||||
std::cout << "defineTensor" << std::endl;
|
|
||||||
|
|
||||||
auto data_type = kNumberTypeFloat32;
|
auto data_type = kNumberTypeFloat32;
|
||||||
auto tensorType = schema::NodeType_ValueNode;
|
auto tensorType = schema::NodeType_ValueNode;
|
||||||
auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, format, tensorType);
|
auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensorType);
|
||||||
auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, format, tensorType);
|
auto output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, format, tensorType);
|
||||||
|
if (input_tensor == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "input tensor null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (output_tensor == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "output tensor null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
std::vector<lite::tensor::Tensor *> inputs{input_tensor};
|
std::vector<lite::tensor::Tensor *> inputs{input_tensor};
|
||||||
std::vector<lite::tensor::Tensor *> outputs{output_tensor};
|
std::vector<lite::tensor::Tensor *> outputs{output_tensor};
|
||||||
|
|
||||||
// run
|
// run
|
||||||
MS_LOG(INFO) << "NewOpenCLKernel";
|
MS_LOG(INFO) << "NewOpenCLKernel";
|
||||||
std::cout << "NewOpenCLKernel" << std::endl;
|
|
||||||
auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
|
auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
|
||||||
|
if (kernel == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "kernel null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
MS_LOG(INFO) << "KernelInit";
|
MS_LOG(INFO) << "KernelInit";
|
||||||
std::cout << "KernelInit" << std::endl;
|
|
||||||
kernel->Init();
|
kernel->Init();
|
||||||
|
|
||||||
std::cout << "LiteKernel" << std::endl;
|
|
||||||
std::vector<kernel::LiteKernel *> kernels{kernel};
|
std::vector<kernel::LiteKernel *> kernels{kernel};
|
||||||
inputs[0]->MallocData(allocator);
|
inputs[0]->MallocData(allocator);
|
||||||
std::cout << "SubGraphOpenCLKernel" << std::endl;
|
auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
||||||
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
|
if (pGraph == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "pGraph null";
|
||||||
|
return;
|
||||||
|
}
|
||||||
MS_LOG(INFO) << "pGraphinit";
|
MS_LOG(INFO) << "pGraphinit";
|
||||||
pGraph->Init();
|
pGraph->Init();
|
||||||
|
|
||||||
// load data
|
// load data
|
||||||
MS_LOG(INFO) << "load data1";
|
MS_LOG(INFO) << "load data1";
|
||||||
|
|
||||||
LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file);
|
LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file);
|
||||||
auto *input_data = reinterpret_cast<float *>(input_tensor->Data());
|
auto *input_data = reinterpret_cast<float *>(input_tensor->Data());
|
||||||
printf("\ninput[0:10]:");
|
printf("\ninput[0:10]:");
|
||||||
|
@ -75,7 +83,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
|
||||||
pGraph->Run();
|
pGraph->Run();
|
||||||
|
|
||||||
MS_LOG(INFO) << "compare result";
|
MS_LOG(INFO) << "compare result";
|
||||||
std::cout << "compare result" << std::endl;
|
|
||||||
CompareOutput(output_tensor, expect_file);
|
CompareOutput(output_tensor, expect_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,23 +91,11 @@ TEST_F(TestSoftmaxOpenCL, Softmax_1) {
|
||||||
std::vector<int> output_shape = {1, 2, 2, 8};
|
std::vector<int> output_shape = {1, 2, 2, 8};
|
||||||
std::string input_file = "softmax_in.bin";
|
std::string input_file = "softmax_in.bin";
|
||||||
std::string expect_file = "softmax_out.bin";
|
std::string expect_file = "softmax_out.bin";
|
||||||
auto param = new SoftmaxParameter;
|
auto param = new (std::nothrow) SoftmaxParameter;
|
||||||
param->axis_ = 3;
|
param->axis_ = 3;
|
||||||
schema::Format format = schema::Format_NHWC4;
|
schema::Format format = schema::Format_NHWC4;
|
||||||
|
|
||||||
RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
|
RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TEST_F(TestSoftmaxOpenCL, Softmax_1x1) {
|
|
||||||
// std::vector<int> input_shape = {1, 100};
|
|
||||||
// std::vector<int> output_shape = {1, 100};
|
|
||||||
// std::string input_file = "softmax1x1_in.bin";
|
|
||||||
// std::string expect_file = "softmax1x1_out.bin";
|
|
||||||
// auto param = new SoftmaxParameter;
|
|
||||||
// param->axis_ = 1;
|
|
||||||
// schema::Format format = schema::Format_NHWC4;
|
|
||||||
//
|
|
||||||
// RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
|
|
||||||
//}
|
|
||||||
|
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
Loading…
Reference in New Issue