From 4e7d7458cedc83265f5038893bcc6ffa0469fe58 Mon Sep 17 00:00:00 2001
From: Pengyongrong <pengyongrong@huawei.com>
Date: Fri, 27 Nov 2020 02:17:11 -0800
Subject: [PATCH] solve gather ops issue_id=1276SX

---
 .../runtime/kernel/opencl/kernel/gather.cc    | 54 ++++++++++--
 .../src/runtime/kernel/opencl/kernel/gather.h |  4 +
 .../runtime/kernel/opencl/kernel/to_format.cc |  5 +-
 .../ut/src/runtime/kernel/opencl/common.cc    |  2 +-
 .../src/runtime/kernel/opencl/gather_tests.cc | 85 +++++++++++++++++++
 5 files changed, 138 insertions(+), 12 deletions(-)

diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
index 40ac54c80da..1fe643a65f4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
@@ -39,12 +39,11 @@ int GatherOpenCLKernel::CheckSpecs() {
     MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size();
     return RET_ERROR;
   }
-
-  if (in_tensors_.at(1)->category() == lite::Tensor::VAR) {
-    MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight.";
+  enable_fp16_ = ocl_runtime_->GetFp16Enable();
+  if (!in_tensors_.at(1)->IsConst() && enable_fp16_) {
+    MS_LOG(ERROR) << "GatherOpenCLKernel Unsupportted intensor1 = tensor and datatype = fp16  ";
     return RET_ERROR;
   }
-
   int input_ndim = in_tensors_.front()->shape().size();
   if (input_ndim < 0 || input_ndim > 4) {
     MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D.";
@@ -59,7 +58,7 @@ int GatherOpenCLKernel::CheckSpecs() {
   TypeId data_type = in_tensors_.at(1)->data_type();
   if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 &&
       data_type != kNumberTypeFloat16) {
-    MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor.";
+    MS_LOG(ERROR) << "GatherOpenCLKernel only supports Int32/Int64/Float32/Float16 indices Tensor.";
     return RET_ERROR;
   }
 
@@ -107,17 +106,51 @@ int GatherOpenCLKernel::Prepare() {
   ocl_runtime_->LoadSource(program_name, gather_source);
   ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
 #endif
-
-  int ret = InitWeights();
-  if (ret != RET_OK) {
-    return ret;
+  if (!in_tensors_.at(1)->IsConst()) {
+    intensor1_is_tensor = true;
   }
+
+  if (!intensor1_is_tensor) {
+    int ret = InitWeights();
+    if (ret != RET_OK) {
+      return ret;
+    }
+  }
+
   SetGlobalLocal();
   SetConstArgs();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
+int GatherOpenCLKernel::ConvertTensorToweight() {
+  auto allocator = ocl_runtime_->GetAllocator();
+  GpuTensorInfo img_info(in_tensors_[1]);
+  size_t dtype = sizeof(cl_int);
+  stride_w = img_info.RowPitch() / dtype;
+  auto indices_tensor = in_tensors_.at(1);
+  auto indices_num = indices_tensor->ElementsNum();
+  indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num));
+  allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true);
+  if (indices_data_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    return RET_ERROR;
+  }
+  auto data_type = indices_tensor->data_type();
+  auto data = indices_tensor->data_c();
+  if (data_type == kNumberTypeInt32) {
+    for (int i = 0; i < indices_num; i++) {
+      indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w];
+    }
+  } else {
+    MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32  "
+                  << " But Your Type is :" << data_type;
+    return RET_ERROR;
+  }
+  allocator->UnmapBuffer(indices_data_);
+  return RET_OK;
+}
+
 int GatherOpenCLKernel::InitWeights() {
   auto indices_tensor = in_tensors_.at(1);
   auto indices_num = indices_tensor->ElementsNum();
@@ -152,6 +185,9 @@ int GatherOpenCLKernel::InitWeights() {
 
 int GatherOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
+  if (intensor1_is_tensor) {
+    ConvertTensorToweight();
+  }
   ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c());
   ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c());
   ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
index 6b1524cbdb6..1578e8eba06 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
@@ -39,6 +39,7 @@ class GatherOpenCLKernel : public OpenCLKernel {
   void SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
+  int ConvertTensorToweight();
 
  protected:
   int UpdateWeights();
@@ -46,6 +47,9 @@ class GatherOpenCLKernel : public OpenCLKernel {
  private:
   int32_t *indices_data_{nullptr};
   int axis_ = {0};
+  bool intensor1_is_tensor{false};
+  bool enable_fp16_{false};
+  cl_int stride_w{1};
 };
 }  // namespace mindspore::kernel
 #endif
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
index 17c468cb522..363341fea48 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@@ -34,7 +34,7 @@ namespace mindspore::kernel {
 
 int ToFormatOpenCLKernel::CheckSpecs() {
   auto data_type = in_tensors_.front()->data_type();
-  if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) {
+  if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16 && data_type != kNumberTypeInt32) {
     MS_LOG(ERROR) << "Unsupported data type " << data_type;
     return RET_ERROR;
   }
@@ -61,7 +61,8 @@ void ToFormatOpenCLKernel::SetGlobalLocal() {
 }
 
 int ToFormatOpenCLKernel::Prepare() {
-  std::map<TypeId, std::string> dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}};
+  std::map<TypeId, std::string> dtype_str{
+    {kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}, {kNumberTypeInt32, "float"}};
   std::string kernel_name;
   if (out_mem_type_ == MemType::IMG) {
     kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()];
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
index 279d7699ba8..68b84096707 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
@@ -68,7 +68,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
         memcpy(tensor->MutableData(), input_data, tensor->Size());
       }
     } else {
-      EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32);
+      EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32);
       subgraph_inputs.push_back(tensor);
       subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data);
     }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
index 4db30fa9205..ba04555630f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
@@ -46,6 +46,22 @@ TEST_F(TestOpenCL_Gather, Axis0) {
   }
 }
 
+TEST_F(TestOpenCL_Gather, Axis0_Tensor) {
+  int axis = 0;
+  std::vector<int> input_shape = {10};
+  std::vector<int> indices_shape = {2};
+  std::vector<int> output_shape = {2};
+  float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  int32_t indices[] = {1, 3};
+  float output_data[] = {1, 3};
+
+  for (auto fp16_enable : {false}) {
+    auto *param = CreateParameter(axis);
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
+  }
+}
+
 TEST_F(TestOpenCL_Gather, Axis1) {
   int axis = 1;
   std::vector<int> input_shape = {1, 5, 4, 4};
@@ -75,6 +91,35 @@ TEST_F(TestOpenCL_Gather, Axis1) {
   }
 }
 
+TEST_F(TestOpenCL_Gather, Axis1_intensor1) {
+  int axis = 1;
+  std::vector<int> input_shape = {1, 5, 4, 4};
+  std::vector<int> indices_shape = {2};
+  std::vector<int> output_shape = {1, 2, 4, 4};
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+                        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
+  float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+
+  int32_t indices_int32[] = {1, 3};
+  int64_t indices_int64[] = {1, 3};
+  float32_t indices_fp32[] = {1, 3};
+  float16_t indices_fp16[] = {1, 3};
+  TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16};
+  void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16};
+
+  for (int i = 0; i < 1; ++i) {
+    for (auto fp16_enable : {false}) {
+      auto *param = CreateParameter(axis);
+      TestMain(
+        {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}},
+        {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
+    }
+  }
+}
+
 TEST_F(TestOpenCL_Gather, Axis2) {
   int axis = 2;
   std::vector<int> input_shape = {1, 5, 4, 4};
@@ -96,6 +141,26 @@ TEST_F(TestOpenCL_Gather, Axis2) {
   }
 }
 
+TEST_F(TestOpenCL_Gather, Axis2_intensor1) {
+  int axis = 2;
+  std::vector<int> input_shape = {1, 5, 4, 4};
+  std::vector<int> indices_shape = {2};
+  std::vector<int> output_shape = {1, 5, 2, 4};
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+                        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
+  int32_t indices[] = {1, 3};
+  float output_data[] = {4,  5,  6,  7,  12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39,
+                         44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79};
+
+  for (auto fp16_enable : {false}) {
+    auto *param = CreateParameter(axis);
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable);
+  }
+}
+
 TEST_F(TestOpenCL_Gather, Axis3) {
   int axis = 3;
   std::vector<int> input_shape = {1, 5, 4, 4};
@@ -117,4 +182,24 @@ TEST_F(TestOpenCL_Gather, Axis3) {
   }
 }
 
+TEST_F(TestOpenCL_Gather, Axis3_intensor1) {
+  int axis = 3;
+  std::vector<int> input_shape = {1, 5, 4, 4};
+  std::vector<int> indices_shape = {2};
+  std::vector<int> output_shape = {1, 5, 4, 2};
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+                        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
+  int32_t indices[] = {1, 3};
+  float output_data[] = {1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39,
+                         41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79};
+
+  for (auto fp16_enable : {false}) {
+    auto *param = CreateParameter(axis);
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable);
+  }
+}
+
 }  // namespace mindspore::lite::opencl::test