From f67838812482959089a38d7db55153612d2b5bdc Mon Sep 17 00:00:00 2001 From: Pengyongrong Date: Tue, 18 Aug 2020 20:48:41 -0700 Subject: [PATCH] add new ops named slice add ops supported fp16 add dimension 0-3 for concat --- .../src/runtime/kernel/opencl/cl/concat.cl | 107 +++++++-- .../runtime/kernel/opencl/kernel/batchnorm.cc | 6 +- .../runtime/kernel/opencl/kernel/concat.cc | 79 +++---- .../src/runtime/kernel/opencl/kernel/concat.h | 2 - .../src/runtime/kernel/opencl/kernel/slice.cc | 6 +- .../runtime/kernel/opencl/kernel/to_format.cc | 8 +- .../runtime/kernel/opencl/batchnorm_tests.cc | 57 ++--- .../src/runtime/kernel/opencl/concat_tests.cc | 208 ++++++------------ .../src/runtime/kernel/opencl/slice_tests.cc | 61 +++-- 9 files changed, 249 insertions(+), 285 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl index c2ae7c91063..b9ad6ad6776 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/concat.cl @@ -2,39 +2,106 @@ __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; __kernel void Concat(__read_only image2d_t input0, __read_only image2d_t input1, __write_only image2d_t output, - int2 input_channels, int4 output_shape) { - int X = get_global_id(0); // H + int4 input_shape0, int4 input_shape1, int4 output_shape, const int axis) { + int X = get_global_id(0); // N*H int Y = get_global_id(1); // W int Z = get_global_id(2); // c/4 - if (X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { + if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { return; } - if (Z < input_channels.x) { - FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X))); - WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + if (axis == 0) { + if (X < input_shape0.x * input_shape0.y) { + FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } else { + FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.x * input_shape0.y))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } + } else if (axis == 1) { + if (X < input_shape0.y) { + FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } else { + FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } + } else if (axis == 2) { + if (Y < input_shape0.z) { + FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } else { + FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } } else { - FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X))); - WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + if (Z < input_shape0.w) { + FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } else { + FLT4 result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); + } } } __kernel void Concat3input(__read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, - __write_only image2d_t output, int3 input_channels, int4 output_shape) { - int X = get_global_id(0); // H + __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, + int4 output_shape, const int axis) { + int X = get_global_id(0); // N*H int Y = get_global_id(1); // W int Z = get_global_id(2); // c/4 - if (X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { + if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { return; } - if (Z < input_channels.x) { - FLT4 result0 = READ_IMAGE(input0, smp_none, (int2)((Y)*input_channels.x + Z, (X))); - WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result0); - } else if (Z < (input_channels.x + input_channels.y)) { - FLT4 result1 = READ_IMAGE(input1, smp_none, (int2)((Y)*input_channels.y + Z - input_channels.x, (X))); - WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result1); + if (axis == 0) { + if (X < input_shape0.x * input_shape0.y) { + FLT4 result0 = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result0); + } else if (X < (input_shape0.x * input_shape0.y + input_shape1.x * input_shape1.y)) { + FLT4 result1 = + READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.x * input_shape0.y))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result1); + } else { + FLT4 result2 = READ_IMAGE( + input2, smp_none, + (int2)((Y)*input_shape2.w + Z, (X - input_shape0.x * input_shape0.y - input_shape1.x * input_shape1.y))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result2); + } + } else if (axis == 1) { + if (X < input_shape0.y) { + FLT4 result0 = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result0); + } else if (X < (input_shape0.y + input_shape1.y)) { + FLT4 result1 = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result1); + } else { + FLT4 result2 = + READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z, (X - input_shape0.y - input_shape1.y))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result2); + } + } else if (axis == 2) { + if (Y < input_shape0.z) { + FLT4 result0 = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result0); + } else if (Y < (input_shape0.z + input_shape0.z)) { + FLT4 result1 = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result1); + } else { + FLT4 result2 = + READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z) * input_shape2.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result2); + } } else { - FLT4 result2 = - READ_IMAGE(input2, smp_none, (int2)((Y)*input_channels.z + Z - input_channels.x - input_channels.y, (X))); - WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result2); + if (Z < input_shape0.w) { + FLT4 result0 = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result0); + } else if (Z < (input_shape0.w + input_shape0.w)) { + FLT4 result1 = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result1); + } else { + FLT4 result2 = + READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z - input_shape0.w - input_shape1.w, (X))); + WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result2); + } } } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index ee8eba8ad3f..5c98f9dfe06 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -100,7 +100,7 @@ void BatchNormGetWorkGroup(const std::vector &global, std::vectorpush_back(z); } int BatchNormOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; + MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto input0_shape = in_tensors_[0]->shape(); @@ -136,12 +136,12 @@ kernel::LiteKernel *OpenCLBatchnormKernelCreator(const std::vectorInit(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: Convolution"; + MS_LOG(ERROR) << " Init kernel failed, name: Batchnorm "; delete kernel; return nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index 2203508a972..f76fc5259ee 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -33,10 +33,10 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) size_t im_dst_x, im_dst_y; if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height(); + im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); } else { im_dst_y = out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); + im_dst_x = out_tensors_[0]->Width() * out_tensors_[0]->Batch(); } size_t img_dtype = CL_FLOAT; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); @@ -51,20 +51,17 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) } int ConcatOpenCLKernel::Init() { if (in_tensors_[0]->shape().size() != 4) { - MS_LOG(ERROR) << "only support dim=4"; + MS_LOG(ERROR) << " only support dim = 4 "; return RET_ERROR; } auto param = reinterpret_cast(this->op_parameter_); - MS_LOG(DEBUG) << "concat at axis=: " << param->axis_; - if (param->axis_ != 0 && param->axis_ != 3) { - MS_LOG(ERROR) << "only support axis=0 or axis=3"; + MS_LOG(DEBUG) << " concat at axis=: " << param->axis_; + if (param->axis_ < 0 || param->axis_ > 3) { + MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 "; return RET_ERROR; } - if (param->axis_ == 0) { - return RET_OK; - } if (in_tensors_.size() == 2) { std::set build_options; std::string source = concat_source; @@ -94,33 +91,6 @@ int ConcatOpenCLKernel::Init() { int ConcatOpenCLKernel::ReSize() { return RET_OK; } -int ConcatOpenCLKernel::Run_axis0() { - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto allocator_ = ocl_runtime->GetAllocator(); - cl::CommandQueue *command_queue = ocl_runtime->GetDefaultCommandQueue(); - - for (auto &tensor : in_tensors_) { - auto buffer = static_cast(allocator_->GetBuffer(tensor->Data())); - ocl_runtime->MapBuffer(*buffer, CL_MAP_READ, tensor->Size(), command_queue, true); - } - for (auto &tensor : out_tensors_) { - auto buffer = static_cast(allocator_->GetBuffer(tensor->Data())); - ocl_runtime->MapBuffer(*buffer, CL_MAP_WRITE, tensor->Size(), command_queue, true); - } - - memcpy(out_tensors_[0]->Data(), in_tensors_[0]->Data(), in_tensors_[0]->Size()); - memcpy(reinterpret_cast(out_tensors_[0]->Data()) + in_tensors_[0]->Size(), in_tensors_[1]->Data(), - in_tensors_[1]->Size()); - - for (auto tensors : {&in_tensors_, &out_tensors_}) { - for (auto &tensor : *tensors) { - auto buffer = static_cast(allocator_->GetBuffer(tensor->Data())); - ocl_runtime->UnmapBuffer(*buffer, tensor->Data()); - } - } - return RET_OK; -} - int ConcatGetBiggestDividerWithPriority(int number, int max_divider) { if (number % 8 == 0 && max_divider >= 8) { return number / 8; @@ -154,21 +124,19 @@ void ConcatGetWorkGroup(const std::vector &global, std::vector * local->push_back(z); } int ConcatOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; + MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); - if (param->axis_ == 0) { - return Run_axis0(); - } auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto input0_shape = in_tensors_[0]->shape(); - auto input1_shape = in_tensors_[1]->shape(); + auto input1_shape = in_tensors_[0]->shape(); + auto input2_shape = in_tensors_[1]->shape(); auto output_shape = out_tensors_[0]->shape(); - cl_int2 input0_shape2_ = {UP_DIV(input0_shape[3], C4NUM), UP_DIV(input1_shape[3], C4NUM)}; // change + cl_int4 input_shape1_ = {input1_shape[0], input1_shape[1], input1_shape[2], UP_DIV(input1_shape[3], C4NUM)}; + cl_int4 input_shape2_ = {input2_shape[0], input2_shape[1], input2_shape[2], UP_DIV(input2_shape[3], C4NUM)}; cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)}; - uint32_t OH = output_shape[1]; // N*H + uint32_t OH = output_shape[0] * output_shape[1]; // N*H uint32_t OW = output_shape[2]; uint32_t OC = UP_DIV(output_shape[3], C4NUM); @@ -182,23 +150,28 @@ int ConcatOpenCLKernel::Run() { ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->Data()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input0_shape2_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); } else if (in_tensors_.size() == 3) { - auto input2_shape = in_tensors_[2]->shape(); - cl_int3 input0_shape3_ = {UP_DIV(input0_shape[3], C4NUM), UP_DIV(input1_shape[3], C4NUM), - UP_DIV(input2_shape[3], C4NUM)}; + auto input3_shape = in_tensors_[2]->shape(); + cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->Data()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, input0_shape3_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); } else if (in_tensors_.size() < 2) { - MS_LOG(ERROR) << " inputs must been >=2"; + MS_LOG(ERROR) << " input sizes must >= 2 "; return RET_ERROR; } else { - MS_LOG(ERROR) << "only support inputs<=3"; + MS_LOG(ERROR) << " only support inputs <= 3 "; return RET_ERROR; } ocl_runtime->RunKernel(kernel_, global, local, nullptr); @@ -213,12 +186,12 @@ kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vectorInit(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: Convolution"; + MS_LOG(ERROR) << " Init kernel failed, name: Concat "; delete kernel; return nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h index 0f1d1ab13de..a411bd6af61 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h @@ -37,8 +37,6 @@ class ConcatOpenCLKernel : public OpenCLKernel { int ReSize() override; - int Run_axis0(); - int Run() override; int GetImageSize(size_t idx, std::vector *img_size) override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc index be857a15f3c..6e8c76d84d5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc @@ -97,7 +97,7 @@ void SlcieGetWorkGroup(const std::vector &global, std::vector *l local->push_back(z); } int SliceOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; + MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto input_shape = in_tensors_[0]->shape(); @@ -131,12 +131,12 @@ kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vectorInit(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: Convolution"; + MS_LOG(ERROR) << " Init kernel failed, name: Slice "; delete kernel; return nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 93b67fba4ef..f6721304ffc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -94,7 +94,7 @@ int ToFormatOpenCLKernel::InitNHWCShape() { int ToFormatOpenCLKernel::ReSize() { return RET_OK; } int ToFormatOpenCLKernel::GetGlobalSize(size_t idx, std::vector *global_size) { - std::vector vec = {nhwc_shape_[1], nhwc_shape_[2], UP_DIV(nhwc_shape_[3], C4NUM)}; + std::vector vec = {nhwc_shape_[0] * nhwc_shape_[1], nhwc_shape_[2], UP_DIV(nhwc_shape_[3], C4NUM)}; *global_size = std::move(vec); return RET_OK; } @@ -107,13 +107,13 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size size_t im_dst_x, im_dst_y; std::vector shapex = out_tensors_[0]->shape(); if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { - int c = shapex[1]; - int h = shapex[2]; + int c = shapex[1] * shapex[2]; + int h = shapex[0]; int w = shapex[3]; im_dst_y = h * UP_DIV(c, C4NUM); im_dst_x = w; } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { - int h = shapex[1]; + int h = shapex[0] * shapex[1]; int w = shapex[2]; int c = shapex[3]; im_dst_x = w * UP_DIV(c, C4NUM); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc index ab95c9e57ef..7ab29a333a8 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc @@ -32,13 +32,6 @@ class TestBatchnormOpenCLfp16 : public mindspore::CommonTest { TestBatchnormOpenCLfp16() {} }; -template -void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bound) { - for (size_t i = 0; i < size; i++) { - T abs = fabs(output_data[i] - correct_data[i]); - ASSERT_LE(abs, err_bound); - } -} TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { MS_LOG(INFO) << "begin test"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); @@ -46,7 +39,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { ocl_runtime->Init(); auto allocator = ocl_runtime->GetAllocator(); - MS_LOG(INFO) << "Read tensors from .bin"; + MS_LOG(INFO) << " Read tensors from .bin "; std::vector input_shape = {1, 256, 256, 48}; std::vector output_shape = {1, 256, 256, 48}; auto data_type = kNumberTypeFloat32; @@ -59,7 +52,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { std::string var_path = "./test_data/batchnorm_varfp16.bin"; std::string offset_path = "./test_data/batchnorm_offsetfp16.bin"; std::string scale_path = "./test_data/batchnorm_scalefp16.bin"; - std::string output_path = "./test_data/batchnorm_out_datafp16.bin"; + std::string output_path = "./test_data/batchnorm_correctdatafp16.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); size_t mean_size, var_size, scale_size, offset_size; @@ -68,7 +61,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { auto scale_data = reinterpret_cast(mindspore::lite::ReadFile(scale_path.c_str(), &scale_size)); auto offset_data = reinterpret_cast(mindspore::lite::ReadFile(offset_path.c_str(), &offset_size)); - MS_LOG(INFO) << "construct tensors"; + MS_LOG(INFO) << " construct tensors "; lite::tensor::Tensor *tensor_data = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); lite::tensor::Tensor *tensor_mean = @@ -81,13 +74,13 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { new (std::nothrow) lite::tensor::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); if (tensor_data == nullptr || tensor_mean == nullptr || tensor_var == nullptr || tensor_scale == nullptr || tensor_offset == nullptr) { - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; return; } auto *output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); if (output_tensor == nullptr) { - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; delete tensor_data; delete tensor_mean; delete tensor_var; @@ -98,10 +91,10 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { std::vector inputs = {tensor_data, tensor_scale, tensor_offset, tensor_mean, tensor_var}; std::vector outputs{output_tensor}; - MS_LOG(INFO) << "initialize tensors"; + MS_LOG(INFO) << " initialize tensors "; auto param = new (std::nothrow) BatchNormParameter(); if (param == nullptr) { - MS_LOG(INFO) << "new BatchNormParameter failed"; + MS_LOG(INFO) << " new BatchNormParameter failed "; for (auto tensor : outputs) { delete tensor; } @@ -111,7 +104,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { auto *batchnorm_kernel = new (std::nothrow) kernel::BatchNormOpenCLKernel(reinterpret_cast(param), inputs, outputs); if (batchnorm_kernel == nullptr) { - MS_LOG(INFO) << "new kernel::BatchNorm_kernel failed"; + MS_LOG(INFO) << " new kernel::BatchNorm_kernel failed "; for (auto tensor : outputs) { delete tensor; } @@ -125,11 +118,11 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { input_tensor->MallocData(allocator); } - MS_LOG(INFO) << "initialize sub_graph"; + MS_LOG(INFO) << " initialize sub_graph "; std::vector kernels{batchnorm_kernel}; auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (sub_graph == nullptr) { - MS_LOG(INFO) << "new kernel::SubGraphOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; for (auto tensor : outputs) { delete tensor; } @@ -138,7 +131,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { return; } sub_graph->Init(); - MS_LOG(INFO) << "init tensors"; + MS_LOG(INFO) << " init tensors "; memcpy(inputs[0]->Data(), input_data, input_size); memcpy(inputs[1]->Data(), scale_data, scale_size); memcpy(inputs[2]->Data(), offset_data, offset_size); @@ -148,7 +141,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { sub_graph->Run(); auto *output_data_gpu = reinterpret_cast(output_tensor->Data()); - CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); + CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01); for (auto tensor : inputs) { delete tensor; } @@ -158,15 +151,14 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { delete param; delete batchnorm_kernel; delete sub_graph; - lite::opencl::OpenCLRuntime::DeleteInstance(); } TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { - MS_LOG(INFO) << "begin test"; + MS_LOG(INFO) << " begin test "; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); auto allocator = ocl_runtime->GetAllocator(); - MS_LOG(INFO) << "Read tensors from .bin"; + MS_LOG(INFO) << " Read tensors from .bin "; std::vector input_shape = {1, 256, 256, 47}; std::vector output_shape = {1, 256, 256, 47}; auto data_type = kNumberTypeFloat32; @@ -188,7 +180,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { auto scale_data = reinterpret_cast(mindspore::lite::ReadFile(scale_path.c_str(), &scale_size)); auto offset_data = reinterpret_cast(mindspore::lite::ReadFile(offset_path.c_str(), &offset_size)); - MS_LOG(INFO) << "construct tensors"; + MS_LOG(INFO) << " construct tensors "; lite::tensor::Tensor *tensor_data = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); lite::tensor::Tensor *tensor_mean = @@ -201,13 +193,13 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { new (std::nothrow) lite::tensor::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); if (tensor_data == nullptr || tensor_mean == nullptr || tensor_var == nullptr || tensor_scale == nullptr || tensor_offset == nullptr) { - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; return; } auto *output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); if (output_tensor == nullptr) { - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; delete tensor_data; delete tensor_mean; delete tensor_var; @@ -218,10 +210,10 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { std::vector inputs = {tensor_data, tensor_scale, tensor_offset, tensor_mean, tensor_var}; std::vector outputs{output_tensor}; - MS_LOG(INFO) << "initialize tensors"; + MS_LOG(INFO) << " initialize tensors "; auto param = new (std::nothrow) BatchNormParameter(); if (param == nullptr) { - MS_LOG(INFO) << "new BatchNormParameter failed"; + MS_LOG(INFO) << " new BatchNormParameter failed "; for (auto tensor : outputs) { delete tensor; } @@ -231,7 +223,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { auto *batchnorm_kernel = new (std::nothrow) kernel::BatchNormOpenCLKernel(reinterpret_cast(param), inputs, outputs); if (batchnorm_kernel == nullptr) { - MS_LOG(INFO) << "new kernel::BatchNorm_kernel failed"; + MS_LOG(INFO) << " new kernel::BatchNorm_kernel failed "; for (auto tensor : outputs) { delete tensor; } @@ -245,11 +237,11 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { input_tensor->MallocData(allocator); } - MS_LOG(INFO) << "initialize sub_graph"; + MS_LOG(INFO) << " initialize sub_graph "; std::vector kernels{batchnorm_kernel}; auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (sub_graph == nullptr) { - MS_LOG(INFO) << "new kernel::SubGraphOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; for (auto tensor : outputs) { delete tensor; } @@ -258,7 +250,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { return; } sub_graph->Init(); - MS_LOG(INFO) << "init tensors"; + MS_LOG(INFO) << " init tensors "; memcpy(inputs[0]->Data(), input_data, input_size); memcpy(inputs[1]->Data(), scale_data, scale_size); memcpy(inputs[2]->Data(), offset_data, offset_size); @@ -268,7 +260,7 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { sub_graph->Run(); auto *output_data_gpu = reinterpret_cast(output_tensor->Data()); - CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); + CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); for (auto tensor : inputs) { delete tensor; } @@ -278,6 +270,5 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { delete param; delete batchnorm_kernel; delete sub_graph; - lite::opencl::OpenCLRuntime::DeleteInstance(); } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc index 08f6223de27..c72dfa67202 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc @@ -18,70 +18,10 @@ #include "utils/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" +#include "mindspore/lite/src/common/file_utils.h" #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h" -template -void ConcatComputeByCPU_2input_dim4_axis3(const T *input0, const T *input1, T *output, std::vector input_shape0, - std::vector input_shape1, std::vector output_shape, - const int axis) { - int postion, index0 = 0, index1 = 0; - for (int i = 0; i < output_shape[0]; i++) { - for (int j = 0; j < output_shape[1]; j++) { - for (int k = 0; k < output_shape[2]; k++) { - postion = i * output_shape[1] * output_shape[2] * output_shape[3] + j * output_shape[2] * output_shape[3] + - k * output_shape[3]; - for (int w = 0; w < output_shape[3]; w++) { - if (w < input_shape0[3] + input_shape1[3]) { - output[postion++] = (w < input_shape0[3]) ? input0[index0++] : input1[index1++]; - } else { - for (int ind = input_shape0[3] + input_shape1[3]; ind < output_shape[3]; ind++) { - output[postion++] = 0; - } - } - } - } - } - } -} -template -void ConcatComputeByCPU_3input_dim4_axis3(T *input0, T *input1, T *input2, T *output, std::vector input_shape0, - std::vector input_shape1, std::vector input_shape2, - std::vector output_shape, const int axis) { - int postion, index0 = 0, index1 = 0, index2 = 0; - for (int i = 0; i < output_shape[0]; i++) { - for (int j = 0; j < output_shape[1]; j++) { - for (int k = 0; k < output_shape[2]; k++) { - postion = i * output_shape[1] * output_shape[2] * output_shape[3] + j * output_shape[2] * output_shape[3] + - k * output_shape[3]; - for (int w = 0; w < output_shape[3]; w++) { - if (w < input_shape0[3]) { - int align = UP_DIV(input_shape0[3], 4) * 4; - index0 = i * input_shape0[1] * input_shape0[2] * align + j * input_shape0[2] * align + k * align + w; - output[postion++] = input0[index0]; - } else if (w >= input_shape0[3] && w < (input_shape0[3] + input_shape1[3])) { - int align = UP_DIV(input_shape1[3], 4) * 4; - index1 = i * input_shape1[1] * input_shape1[2] * align + j * input_shape1[2] * align + k * align + w - - input_shape0[3]; - output[postion++] = input1[index1]; - } else if ((input_shape0[3] + input_shape1[3]) <= w && - w < (input_shape0[3] + input_shape1[3] + input_shape2[3])) { - int align = UP_DIV(input_shape2[3], 4) * 4; - index2 = i * input_shape2[1] * input_shape2[2] * align + j * input_shape2[2] * align + k * align + w - - input_shape0[3] - input_shape1[3]; - output[postion++] = input2[index2]; - } else { - for (int ind = input_shape0[3] + input_shape1[3] + input_shape2[3]; ind < output_shape[3]; ind++) { - output[postion++] = 0; - } - break; - } - } - } - } - } -} - namespace mindspore { class TestConcatOpenCLfp32 : public mindspore::CommonTest { public: @@ -100,17 +40,29 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou } } TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { - MS_LOG(INFO) << "begin test"; + MS_LOG(INFO) << " begin test "; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->SetFp16Enable(true); ocl_runtime->Init(); auto allocator = ocl_runtime->GetAllocator(); - MS_LOG(INFO) << "init tensors"; - constexpr int INPUT_NUM = 3; - std::array, INPUT_NUM> input_shapes = { - std::vector{1, 16, 256, 80}, std::vector{1, 16, 256, 80}, std::vector{1, 16, 256, 80}}; - std::vector output_shape = {1, 16, 256, 240}; + // get the input from .bin + size_t input1_size, input2_size, input3_size, output_size; + std::string input1Ppath = "./test_data/concatfp16_input1.bin"; + std::string input2Ppath = "./test_data/concatfp16_input2.bin"; + std::string input3Ppath = "./test_data/concatfp16_input3.bin"; + std::string correctOutputPath = "./test_data/concatfp16_output.bin"; + auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); + auto input_data2 = reinterpret_cast(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); + auto input_data3 = reinterpret_cast(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); + auto correctOutput = + reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); + + MS_LOG(INFO) << " init tensors "; + constexpr int INPUT_NUM = 2; + std::array, INPUT_NUM> input_shapes = {std::vector{1, 19, 19, 96}, + std::vector{1, 19, 19, 96}}; + std::vector output_shape = {2, 19, 19, 96}; auto data_type = kNumberTypeFloat16; auto tensor_type = schema::NodeType_ValueNode; std::vector inputs; @@ -118,26 +70,26 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { auto input_temp = new (std::nothrow) lite::tensor::Tensor(data_type, shape, schema::Format_NHWC4, tensor_type); inputs.push_back(input_temp); if (input_temp == nullptr) { - MS_LOG(INFO) << "new input_tensor failed"; + MS_LOG(INFO) << " new input_tensor failed "; return; } } auto *output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); if (output_tensor == nullptr) { - MS_LOG(INFO) << "new output_tensor failed"; + MS_LOG(INFO) << " new output_tensor failed "; for (auto tensor : inputs) { delete tensor; } return; } std::vector outputs{output_tensor}; - MS_LOG(INFO) << "input_shapes size=: " << input_shapes.size(); + MS_LOG(INFO) << " input_shapes size =: " << input_shapes.size(); - MS_LOG(INFO) << "initialize tensors"; + MS_LOG(INFO) << " initialize tensors "; auto param = new (std::nothrow) ConcatParameter(); if (param == nullptr) { - MS_LOG(INFO) << "new ConcatParameter failed"; + MS_LOG(INFO) << " new ConcatParameter failed "; for (auto tensor : inputs) { delete tensor; } @@ -146,11 +98,11 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { } return; } - param->axis_ = 3; + param->axis_ = 0; auto *concat_kernel = new (std::nothrow) kernel::ConcatOpenCLKernel(reinterpret_cast(param), inputs, outputs); if (concat_kernel == nullptr) { - MS_LOG(INFO) << "new kernel::ConcatOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::ConcatOpenCLKernel failed "; for (auto tensor : inputs) { delete tensor; } @@ -165,12 +117,11 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { for (auto &input_tensor : inputs) { input_tensor->MallocData(allocator); } - - MS_LOG(INFO) << "initialize sub_graph"; + MS_LOG(INFO) << " initialize sub_graph "; std::vector kernels{concat_kernel}; auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (sub_graph == nullptr) { - MS_LOG(INFO) << "new kernel::SubGraphOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; for (auto tensor : inputs) { delete tensor; } @@ -182,33 +133,22 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { return; } sub_graph->Init(); - unsigned int seed = 123; - MS_LOG(INFO) << "initialize input data"; - for (auto &input_tensor : inputs) { - auto input_data = reinterpret_cast(input_tensor->Data()); - for (int i = 0; i < input_tensor->ElementsNum(); ++i) { - input_data[i] = static_cast(rand_r(&seed) % 10 + 1); - } - } - - // compute the result for CPU - auto *input_data0 = reinterpret_cast(inputs[0]->Data()); - auto *input_data1 = reinterpret_cast(inputs[1]->Data()); - std::vector output_data_cpu(output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]); + MS_LOG(INFO) << " initialize input data "; if (inputs.size() == 2) { - ConcatComputeByCPU_2input_dim4_axis3(input_data0, input_data1, output_data_cpu.data(), input_shapes[0], - input_shapes[1], output_shape, param->axis_); - } - if (inputs.size() == 3) { - auto *input_data2 = reinterpret_cast(inputs[2]->Data()); - ConcatComputeByCPU_3input_dim4_axis3(input_data0, input_data1, input_data2, output_data_cpu.data(), input_shapes[0], - input_shapes[1], input_shapes[2], output_shape, param->axis_); + memcpy(inputs[0]->Data(), input_data1, input1_size); + memcpy(inputs[1]->Data(), input_data2, input2_size); + } else if (inputs.size() == 3) { + memcpy(inputs[0]->Data(), input_data1, input1_size); + memcpy(inputs[1]->Data(), input_data2, input2_size); + memcpy(inputs[2]->Data(), input_data3, input3_size); + } else { + MS_LOG(ERROR) << " input size must be 2 or 3"; } std::cout << "==================output data================" << std::endl; sub_graph->Run(); auto *output_data_gpu = reinterpret_cast(output_tensor->Data()); - CompareOutputData1(output_data_gpu, output_data_cpu.data(), output_tensor->ElementsNum(), 0.00001); + CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); for (auto tensor : inputs) { delete tensor; } @@ -218,47 +158,57 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { delete param; delete concat_kernel; delete sub_graph; - lite::opencl::OpenCLRuntime::DeleteInstance(); } TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { - MS_LOG(INFO) << "begin test"; + MS_LOG(INFO) << " begin test "; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); auto allocator = ocl_runtime->GetAllocator(); - MS_LOG(INFO) << "init tensors"; + // get the input from .bin + size_t input1_size, input2_size, input3_size, output_size; + std::string input1Ppath = "./test_data/concat_input1.bin"; + std::string input2Ppath = "./test_data/concat_input2.bin"; + std::string input3Ppath = "./test_data/concat_input3.bin"; + std::string correctOutputPath = "./test_data/concat_output.bin"; + auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); + auto input_data2 = reinterpret_cast(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); + auto input_data3 = reinterpret_cast(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); + auto correctOutput = reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); + + MS_LOG(INFO) << " init tensors "; constexpr int INPUT_NUM = 3; std::array, INPUT_NUM> input_shapes = { std::vector{1, 16, 256, 80}, std::vector{1, 16, 256, 80}, std::vector{1, 16, 256, 80}}; - std::vector output_shape = {1, 16, 256, 240}; + std::vector output_shape = {1, 48, 256, 80}; auto data_type = kNumberTypeFloat32; auto tensor_type = schema::NodeType_ValueNode; std::vector inputs; for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::tensor::Tensor(data_type, shape, schema::Format_NHWC4, tensor_type); + auto input_temp = new (std::nothrow) lite::tensor::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); inputs.push_back(input_temp); if (input_temp == nullptr) { - MS_LOG(INFO) << "new input_tensor failed"; + MS_LOG(INFO) << " new input_tensor failed "; return; } } auto *output_tensor = - new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); + new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); if (output_tensor == nullptr) { - MS_LOG(INFO) << "new output_tensor failed"; + MS_LOG(INFO) << " new output_tensor failed "; for (auto tensor : inputs) { delete tensor; } return; } std::vector outputs{output_tensor}; - MS_LOG(INFO) << "input_shapes size=: " << input_shapes.size(); + MS_LOG(INFO) << " input_shapes size=: " << input_shapes.size(); - MS_LOG(INFO) << "initialize tensors"; + MS_LOG(INFO) << " initialize tensors "; auto param = new (std::nothrow) ConcatParameter(); if (param == nullptr) { - MS_LOG(INFO) << "new ConcatParameter failed"; + MS_LOG(INFO) << " new ConcatParameter failed "; for (auto tensor : inputs) { delete tensor; } @@ -267,11 +217,11 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { } return; } - param->axis_ = 3; + param->axis_ = 1; auto *concat_kernel = new (std::nothrow) kernel::ConcatOpenCLKernel(reinterpret_cast(param), inputs, outputs); if (concat_kernel == nullptr) { - MS_LOG(INFO) << "new kernel::ConcatOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::ConcatOpenCLKernel failed "; for (auto tensor : inputs) { delete tensor; } @@ -287,11 +237,11 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { input_tensor->MallocData(allocator); } - MS_LOG(INFO) << "initialize sub_graph"; + MS_LOG(INFO) << " initialize sub_graph "; std::vector kernels{concat_kernel}; auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (sub_graph == nullptr) { - MS_LOG(INFO) << "new kernel::SubGraphOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; for (auto tensor : inputs) { delete tensor; } @@ -303,33 +253,22 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { return; } sub_graph->Init(); - unsigned int seed = 123; - MS_LOG(INFO) << "initialize input data"; - for (auto &input_tensor : inputs) { - auto input_data = reinterpret_cast(input_tensor->Data()); - for (int i = 0; i < input_tensor->ElementsNum(); ++i) { - input_data[i] = static_cast(rand_r(&seed) % 10 + 1); - } - } - - // compute the result for CPU - auto *input_data0 = reinterpret_cast(inputs[0]->Data()); - auto *input_data1 = reinterpret_cast(inputs[1]->Data()); - std::vector output_data_cpu(output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]); + MS_LOG(INFO) << " initialize input data "; if (inputs.size() == 2) { - ConcatComputeByCPU_2input_dim4_axis3(input_data0, input_data1, output_data_cpu.data(), input_shapes[0], - input_shapes[1], output_shape, param->axis_); - } - if (inputs.size() == 3) { - auto *input_data2 = reinterpret_cast(inputs[2]->Data()); - ConcatComputeByCPU_3input_dim4_axis3(input_data0, input_data1, input_data2, output_data_cpu.data(), input_shapes[0], - input_shapes[1], input_shapes[2], output_shape, param->axis_); + memcpy(inputs[0]->Data(), input_data1, input1_size); + memcpy(inputs[1]->Data(), input_data2, input2_size); + } else if (inputs.size() == 3) { + memcpy(inputs[0]->Data(), input_data1, input1_size); + memcpy(inputs[1]->Data(), input_data2, input2_size); + memcpy(inputs[2]->Data(), input_data3, input3_size); + } else { + MS_LOG(ERROR) << " input size must be 2 or 3 "; } std::cout << "==================output data================" << std::endl; sub_graph->Run(); auto *output_data_gpu = reinterpret_cast(output_tensor->Data()); - CompareOutputData1(output_data_gpu, output_data_cpu.data(), output_tensor->ElementsNum(), 0.00001); + CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); for (auto tensor : inputs) { delete tensor; } @@ -339,6 +278,5 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { delete param; delete concat_kernel; delete sub_graph; - lite::opencl::OpenCLRuntime::DeleteInstance(); } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc index 33ff054d32b..3faefcd9350 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc @@ -41,44 +41,43 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou } TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { - MS_LOG(INFO) << "begin test"; + MS_LOG(INFO) << " begin test "; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); auto allocator = ocl_runtime->GetAllocator(); - MS_LOG(INFO) << "Read tensors from .bin"; - std::vector input_shape = {1, 256, 256, 48}; - std::vector output_shape = {1, 255, 255, 15}; - std::vector begin = {0, 1, 1, 7}; - std::vector size = {1, 255, 255, 15}; + MS_LOG(INFO) << " Read tensors from .bin "; + std::vector input_shape = {1, 19, 19, 96}; + std::vector output_shape = {1, 10, 10, 13}; + std::vector begin = {0, 2, 3, 4}; + std::vector size = {1, 10, 10, 13}; auto data_type = kNumberTypeFloat32; auto tensor_type = schema::NodeType_ValueNode; // get the input from .bin size_t input_size, output_size; - std::string input_path = "./test_data/in_datafp32.bin"; - std::string output_path = "./test_data/out_datafp32.bin"; + std::string input_path = "./test_data/in_slicefp32.bin"; + std::string output_path = "./test_data/out_slicefp32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); - - MS_LOG(INFO) << "construct tensors"; + MS_LOG(INFO) << " construct tensors "; lite::tensor::Tensor *tensor_data = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); if (tensor_data == nullptr) { - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; return; } auto *output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); if (output_tensor == nullptr) { delete tensor_data; - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; return; } std::vector inputs = {tensor_data}; std::vector outputs = {output_tensor}; - MS_LOG(INFO) << "setting SliceParameter"; + MS_LOG(INFO) << "setting SliceParameter "; auto param = new (std::nothrow) SliceParameter(); if (param == nullptr) { for (auto tensor : inputs) { @@ -87,7 +86,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { for (auto tensor : outputs) { delete tensor; } - MS_LOG(INFO) << "new SliceParameter failed"; + MS_LOG(INFO) << "new SliceParameter failed "; return; } for (int i = 0; i < input_shape.size(); i++) { @@ -105,7 +104,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { delete tensor; } delete param; - MS_LOG(INFO) << "new kernel::slice_kernel failed"; + MS_LOG(INFO) << "new kernel::slice_kernel failed "; return; } slice_kernel->Init(); @@ -115,7 +114,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { input_tensor->MallocData(allocator); } - MS_LOG(INFO) << "initialize sub_graph"; + MS_LOG(INFO) << " initialize sub_graph "; std::vector kernels{slice_kernel}; auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (sub_graph == nullptr) { @@ -127,12 +126,12 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { } delete param; delete slice_kernel; - MS_LOG(INFO) << "new kernel::SubGraphOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; return; } sub_graph->Init(); - MS_LOG(INFO) << "init tensors"; + MS_LOG(INFO) << " init tensors "; memcpy(inputs[0]->Data(), input_data, input_size); std::cout << "==================output data================" << std::endl; @@ -148,16 +147,15 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { } delete slice_kernel; delete sub_graph; - lite::opencl::OpenCLRuntime::DeleteInstance(); } TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { - MS_LOG(INFO) << "begin test"; + MS_LOG(INFO) << " begin test "; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->SetFp16Enable(true); ocl_runtime->Init(); auto allocator = ocl_runtime->GetAllocator(); - MS_LOG(INFO) << "Read tensors from .bin"; + MS_LOG(INFO) << " Read tensors from .bin "; std::vector input_shape = {1, 256, 256, 48}; std::vector output_shape = {1, 255, 255, 15}; std::vector begin = {0, 1, 1, 7}; @@ -172,24 +170,24 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); - MS_LOG(INFO) << "construct tensors"; + MS_LOG(INFO) << " construct tensors "; lite::tensor::Tensor *tensor_data = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); if (tensor_data == nullptr) { - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; return; } auto *output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); if (output_tensor == nullptr) { delete tensor_data; - MS_LOG(INFO) << "init tensor failed"; + MS_LOG(INFO) << " init tensor failed "; return; } std::vector inputs = {tensor_data}; std::vector outputs = {output_tensor}; - MS_LOG(INFO) << "setting SliceParameter"; + MS_LOG(INFO) << " setting SliceParameter "; auto param = new (std::nothrow) SliceParameter(); if (param == nullptr) { for (auto tensor : inputs) { @@ -198,10 +196,10 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { for (auto tensor : outputs) { delete tensor; } - MS_LOG(INFO) << "new SliceParameter failed"; + MS_LOG(INFO) << " new SliceParameter failed "; return; } - for (int i = 0; i < 4; i++) { + for (int i = 0; i < input_shape.size(); i++) { param->begin_[i] = begin[i]; param->size_[i] = size[i]; } @@ -216,7 +214,7 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { delete tensor; } delete param; - MS_LOG(INFO) << "new kernel::slice_kernel failed"; + MS_LOG(INFO) << " new kernel::slice_kernel failed "; return; } slice_kernel->Init(); @@ -226,7 +224,7 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { input_tensor->MallocData(allocator); } - MS_LOG(INFO) << "initialize sub_graph"; + MS_LOG(INFO) << " initialize sub_graph "; std::vector kernels{slice_kernel}; auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); if (sub_graph == nullptr) { @@ -238,12 +236,12 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { } delete param; delete slice_kernel; - MS_LOG(INFO) << "new kernel::SubGraphOpenCLKernel failed"; + MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; return; } sub_graph->Init(); - MS_LOG(INFO) << "init tensors"; + MS_LOG(INFO) << " init tensors "; memcpy(inputs[0]->Data(), input_data, input_size); std::cout << "==================output data================" << std::endl; @@ -259,6 +257,5 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { } delete slice_kernel; delete sub_graph; - lite::opencl::OpenCLRuntime::DeleteInstance(); } } // namespace mindspore