From 4e7d7458cedc83265f5038893bcc6ffa0469fe58 Mon Sep 17 00:00:00 2001 From: Pengyongrong Date: Fri, 27 Nov 2020 02:17:11 -0800 Subject: [PATCH] solve gather ops issue_id=1276SX --- .../runtime/kernel/opencl/kernel/gather.cc | 54 ++++++++++-- .../src/runtime/kernel/opencl/kernel/gather.h | 4 + .../runtime/kernel/opencl/kernel/to_format.cc | 5 +- .../ut/src/runtime/kernel/opencl/common.cc | 2 +- .../src/runtime/kernel/opencl/gather_tests.cc | 85 +++++++++++++++++++ 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc index 40ac54c80da..1fe643a65f4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc @@ -39,12 +39,11 @@ int GatherOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size(); return RET_ERROR; } - - if (in_tensors_.at(1)->category() == lite::Tensor::VAR) { - MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight."; + enable_fp16_ = ocl_runtime_->GetFp16Enable(); + if (!in_tensors_.at(1)->IsConst() && enable_fp16_) { + MS_LOG(ERROR) << "GatherOpenCLKernel Unsupportted intensor1 = tensor and datatype = fp16 "; return RET_ERROR; } - int input_ndim = in_tensors_.front()->shape().size(); if (input_ndim < 0 || input_ndim > 4) { MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D."; @@ -59,7 +58,7 @@ int GatherOpenCLKernel::CheckSpecs() { TypeId data_type = in_tensors_.at(1)->data_type(); if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) { - MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor."; + MS_LOG(ERROR) << "GatherOpenCLKernel only supports Int32/Int64/Float32/Float16 indices Tensor."; return RET_ERROR; } @@ -107,17 +106,51 @@ int GatherOpenCLKernel::Prepare() { ocl_runtime_->LoadSource(program_name, gather_source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - - int ret = InitWeights(); - if (ret != RET_OK) { - return ret; + if (!in_tensors_.at(1)->IsConst()) { + intensor1_is_tensor = true; } + + if (!intensor1_is_tensor) { + int ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } + } + SetGlobalLocal(); SetConstArgs(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } +int GatherOpenCLKernel::ConvertTensorToweight() { + auto allocator = ocl_runtime_->GetAllocator(); + GpuTensorInfo img_info(in_tensors_[1]); + size_t dtype = sizeof(cl_int); + stride_w = img_info.RowPitch() / dtype; + auto indices_tensor = in_tensors_.at(1); + auto indices_num = indices_tensor->ElementsNum(); + indices_data_ = reinterpret_cast(allocator->Malloc(sizeof(int32_t) * indices_num)); + allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); + if (indices_data_ == nullptr) { + MS_LOG(ERROR) << "Memory allocation failed"; + return RET_ERROR; + } + auto data_type = indices_tensor->data_type(); + auto data = indices_tensor->data_c(); + if (data_type == kNumberTypeInt32) { + for (int i = 0; i < indices_num; i++) { + indices_data_[i] = reinterpret_cast(data)[i * stride_w]; + } + } else { + MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32 " + << " But Your Type is :" << data_type; + return RET_ERROR; + } + allocator->UnmapBuffer(indices_data_); + return RET_OK; +} + int GatherOpenCLKernel::InitWeights() { auto indices_tensor = in_tensors_.at(1); auto indices_num = indices_tensor->ElementsNum(); @@ -152,6 +185,9 @@ int GatherOpenCLKernel::InitWeights() { int GatherOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; + if (intensor1_is_tensor) { + ConvertTensorToweight(); + } ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h index 6b1524cbdb6..1578e8eba06 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h @@ -39,6 +39,7 @@ class GatherOpenCLKernel : public OpenCLKernel { void SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } + int ConvertTensorToweight(); protected: int UpdateWeights(); @@ -46,6 +47,9 @@ class GatherOpenCLKernel : public OpenCLKernel { private: int32_t *indices_data_{nullptr}; int axis_ = {0}; + bool intensor1_is_tensor{false}; + bool enable_fp16_{false}; + cl_int stride_w{1}; }; } // namespace mindspore::kernel #endif diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 17c468cb522..363341fea48 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -34,7 +34,7 @@ namespace mindspore::kernel { int ToFormatOpenCLKernel::CheckSpecs() { auto data_type = in_tensors_.front()->data_type(); - if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) { + if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16 && data_type != kNumberTypeInt32) { MS_LOG(ERROR) << "Unsupported data type " << data_type; return RET_ERROR; } @@ -61,7 +61,8 @@ void ToFormatOpenCLKernel::SetGlobalLocal() { } int ToFormatOpenCLKernel::Prepare() { - std::map dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}}; + std::map dtype_str{ + {kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}, {kNumberTypeInt32, "float"}}; std::string kernel_name; if (out_mem_type_ == MemType::IMG) { kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()]; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc index 279d7699ba8..68b84096707 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc @@ -68,7 +68,7 @@ void TestMain(const std::vector &input_infos, std::tupleMutableData(), input_data, tensor->Size()); } } else { - EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32); + EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32); subgraph_inputs.push_back(tensor); subgraph_inputs_data[tensor] = reinterpret_cast(input_data); } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc index 4db30fa9205..ba04555630f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc @@ -46,6 +46,22 @@ TEST_F(TestOpenCL_Gather, Axis0) { } } +TEST_F(TestOpenCL_Gather, Axis0_Tensor) { + int axis = 0; + std::vector input_shape = {10}; + std::vector indices_shape = {2}; + std::vector output_shape = {2}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + int32_t indices[] = {1, 3}; + float output_data[] = {1, 3}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); + } +} + TEST_F(TestOpenCL_Gather, Axis1) { int axis = 1; std::vector input_shape = {1, 5, 4, 4}; @@ -75,6 +91,35 @@ TEST_F(TestOpenCL_Gather, Axis1) { } } +TEST_F(TestOpenCL_Gather, Axis1_intensor1) { + int axis = 1; + std::vector input_shape = {1, 5, 4, 4}; + std::vector indices_shape = {2}; + std::vector output_shape = {1, 2, 4, 4}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; + float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + + int32_t indices_int32[] = {1, 3}; + int64_t indices_int64[] = {1, 3}; + float32_t indices_fp32[] = {1, 3}; + float16_t indices_fp16[] = {1, 3}; + TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16}; + void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16}; + + for (int i = 0; i < 1; ++i) { + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain( + {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); + } + } +} + TEST_F(TestOpenCL_Gather, Axis2) { int axis = 2; std::vector input_shape = {1, 5, 4, 4}; @@ -96,6 +141,26 @@ TEST_F(TestOpenCL_Gather, Axis2) { } } +TEST_F(TestOpenCL_Gather, Axis2_intensor1) { + int axis = 2; + std::vector input_shape = {1, 5, 4, 4}; + std::vector indices_shape = {2}; + std::vector output_shape = {1, 5, 2, 4}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; + int32_t indices[] = {1, 3}; + float output_data[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39, + 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, + {output_shape, output_data}, param, fp16_enable); + } +} + TEST_F(TestOpenCL_Gather, Axis3) { int axis = 3; std::vector input_shape = {1, 5, 4, 4}; @@ -117,4 +182,24 @@ TEST_F(TestOpenCL_Gather, Axis3) { } } +TEST_F(TestOpenCL_Gather, Axis3_intensor1) { + int axis = 3; + std::vector input_shape = {1, 5, 4, 4}; + std::vector indices_shape = {2}; + std::vector output_shape = {1, 5, 4, 2}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; + int32_t indices[] = {1, 3}; + float output_data[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, + 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, + {output_shape, output_data}, param, fp16_enable); + } +} + } // namespace mindspore::lite::opencl::test