!45677 [MSLITE]新增端侧opencl算子Crop

Merge pull request !45677 from fatmouse007/for_gpu
2022-11-21 07:57:12 +00:00 · 2022-11-21 07:57:12 +00:00 · ebaa385195
parent 9d2282c9c2 ab93e4b6ac
commit ebaa385195
6 changed files with 301 additions and 1 deletions
--- a/mindspore/lite/src/litert/kernel/opencl/cl/crop.cl
+++ b/mindspore/lite/src/litert/kernel/opencl/cl/crop.cl
@ -0,0 +1,26 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define C4NUM 4
+
+__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
+
+__kernel void crop(__read_only image2d_t src_data, __write_only image2d_t dst_data,
+                   int4 in_shape, int4 out_shape, int4 offset) {
+  int out_w = get_global_id(0);
+  int out_h = get_global_id(1);
+
+  int out_batch_idx = out_h / out_shape.y;
+  int out_height_idx = out_h % out_shape.y;
+  int in_batch_idx = out_batch_idx + offset.x;
+  int in_height_idx = out_height_idx + offset.y;
+  int in_h = in_batch_idx * in_shape.y + in_height_idx;
+
+  int out_width_idx = (out_w * C4NUM) / out_shape.w;
+  int out_channel_idx = (out_w * C4NUM) % out_shape.w;
+  int in_width_idx = out_width_idx + offset.z;
+  int in_channel_idx = out_channel_idx + offset.w;
+  int in_w = in_width_idx * in_shape.w + in_channel_idx;
+
+  DTYPE4 res = READ_IMAGE(src_data, smp_zero, (int2)(in_w / C4NUM, in_h));
+  WRITE_IMAGE(dst_data, (int2)(out_w, out_h), res);
+}
--- a/mindspore/lite/src/litert/kernel/opencl/kernel/crop.cc
+++ b/mindspore/lite/src/litert/kernel/opencl/kernel/crop.cc
@ -0,0 +1,160 @@
+
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/litert/kernel/opencl/kernel/crop.h"
+#include <map>
+#include <string>
+#include "src/litert/kernel_registry.h"
+#include "src/litert/kernel/opencl/utils.h"
+#include "src/litert/kernel/opencl/cl/crop.cl.inc"
+
+using mindspore::kernel::KERNEL_ARCH::kGPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Crop;
+
+namespace mindspore::kernel {
+namespace {
+const std::map<int, std::string> CROP_SUPPORT_DTYPES = {
+  {kNumberTypeFloat32, "fp32"},
+  {kNumberTypeFloat16, "fp16"},
+  {kNumberTypeInt32, "int32"},
+};
+}
+
+int CropOpenCLKernel::CheckSpecsWithoutShape() {
+  auto input_dtype = in_tensors_.front()->data_type();
+  if (CROP_SUPPORT_DTYPES.find(input_dtype) == CROP_SUPPORT_DTYPES.end()) {
+    MS_LOG(WARNING) << "input dtype must be float32/float16/int32";
+    return RET_ERROR;
+  }
+
+  auto output_dtype = out_tensors_.front()->data_type();
+  if (CROP_SUPPORT_DTYPES.find(output_dtype) == CROP_SUPPORT_DTYPES.end()) {
+    MS_LOG(WARNING) << "output dtype must be float32/float16/int32";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int CropOpenCLKernel::CheckSpecs() {
+  if (in_tensors_.size() != INPUT_TENSOR_SIZE_2 || out_tensors_.size() != OUTPUT_TENSOR_SIZE_1) {
+    MS_LOG(WARNING) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size();
+    return RET_ERROR;
+  }
+
+  auto input_tensor = in_tensors_.front();
+  if ((input_tensor->Channel() % C4NUM) != 0) {
+    MS_LOG(WARNING) << "input channel must can be divided by 4";
+    return RET_ERROR;
+  }
+
+  auto output_tensor = out_tensors_.front();
+  if ((output_tensor->Channel() % C4NUM) != 0) {
+    MS_LOG(WARNING) << "output channel must can be divided by 4";
+    return RET_ERROR;
+  }
+
+  return RET_OK;
+}
+
+int CropOpenCLKernel::Prepare() {
+  out_gpu_info_ = GpuTensorInfo(out_tensors_[0]);
+
+  const std::string program_name = "crop_program";
+  const std::string kernel_name = "crop";
+  auto build_option_ext = CreateBuildOptionsExtByDType(this->registry_data_type());
+  if (!ocl_runtime_->LoadSource(program_name, crop_source)) {
+    MS_LOG(ERROR) << "Load source failed.";
+    return RET_ERROR;
+  }
+  auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_option_ext);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Build kernel failed.";
+    return ret;
+  }
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
+  (void)SetGlobalLocal();
+  return RET_OK;
+}
+
+void CropOpenCLKernel::RightShiftOffsetByAxis() {
+  bzero(offset_, sizeof(int) * COMM_SHAPE_SIZE);
+  for (int i = 0; i < crop_param_->offset_size_; i++) {
+    int index = i + crop_param_->axis_;
+    if ((index < 0) || (index >= COMM_SHAPE_SIZE)) {
+      continue;
+    }
+    offset_[index] = crop_param_->offset_[i];
+  }
+}
+
+int CropOpenCLKernel::SetConstArgs() {
+  auto out_tensor = out_tensors_[0];
+  cl_int4 cl_out_shape = {static_cast<int>(out_tensor->Batch()), static_cast<int>(out_tensor->Height()),
+                          static_cast<int>(out_tensor->Width()), static_cast<int>(out_tensor->Channel())};
+  auto in_tensor = in_tensors_[0];
+  cl_int4 cl_in_shape = {static_cast<int>(in_tensor->Batch()), static_cast<int>(in_tensor->Height()),
+                         static_cast<int>(in_tensor->Width()), static_cast<int>(in_tensor->Channel())};
+  RightShiftOffsetByAxis();
+  cl_int4 cl_offset = {offset_[0], offset_[1], offset_[2], offset_[3]};
+
+  if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX2, cl_in_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Set cl arg: in_shape failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX3, cl_out_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Set cl arg: out_shape failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX4, cl_offset) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "Set cl arg: offset failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int CropOpenCLKernel::SetGlobalLocal() {
+  global_size_ = {out_gpu_info_.width, out_gpu_info_.height};
+  OpenCLKernel::AlignGlobalLocal(global_size_, {});
+  return RET_OK;
+}
+
+int CropOpenCLKernel::Run() {
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX0, in_tensors_.front()->data()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX1, out_tensors_.front()->data()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>)
+REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>);
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>);
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/litert/kernel/opencl/kernel/crop.h
+++ b/mindspore/lite/src/litert/kernel/opencl/kernel/crop.h
@ -0,0 +1,49 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CROP_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CROP_H_
+
+#include <vector>
+#include <string>
+#include "src/litert/kernel/opencl/opencl_kernel.h"
+#include "nnacl/crop_parameter.h"
+
+namespace mindspore::kernel {
+class CropOpenCLKernel : public OpenCLKernel {
+ public:
+  using OpenCLKernel::OpenCLKernel;
+  CropOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : OpenCLKernel(parameter, inputs, outputs, ctx), crop_param_(reinterpret_cast<CropParameter *>(parameter)) {}
+  ~CropOpenCLKernel() override = default;
+
+  int Prepare() override;
+  int CheckSpecsWithoutShape() override;
+  int CheckSpecs() override;
+  int SetConstArgs() override;
+  int SetGlobalLocal() override;
+  int Run() override;
+
+ private:
+  void RightShiftOffsetByAxis();
+
+  CropParameter *crop_param_ = nullptr;
+  GpuTensorInfo out_gpu_info_ = {};
+  int offset_[COMM_SHAPE_SIZE] = {0};
+};
+}  // namespace mindspore::kernel
+#endif
--- a/mindspore/lite/src/litert/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/litert/kernel/opencl/opencl_kernel.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -333,6 +333,7 @@ class OpenCLKernel : public LiteKernel {
  lite::opencl::OpenCLRuntimeInnerWrapper ocl_runtime_wrap_;
  static inline std::map<std::string, BaseTuningParameter> tuned_param_cache_;
 };
+
 template <class T>
 kernel::LiteKernel *OpenCLKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                        const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
--- a/mindspore/lite/test/config_level0/ut_arm64.cfg
+++ b/mindspore/lite/test/config_level0/ut_arm64.cfg
@ -132,6 +132,7 @@ TestOpenCL_Reduce.Sum
 TestOpenCL_Reduce.MeanWC
 TestOpenCL_Reduce.SumWC
 TestOpenCL_Reduce.MeanC
+TestOpenCL_Crop.*
 MultipleDeviceTest.OldApi1
 MultipleDeviceTest.OldApi2
 MultipleDeviceTest.NewApi1
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/crop_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/crop_tests.cc
@ -0,0 +1,63 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vector>
+#include "ut/src/runtime/kernel/opencl/common.h"
+#include "nnacl/crop_parameter.h"
+
+namespace mindspore::lite::opencl::test {
+class TestOpenCL_Crop : public CommonTest {};
+
+namespace {
+// PrimitiveType_Reshape: src/ops/populate/crop_populate.cc
+OpParameter *CreateParameter(int64_t axis, const std::vector<int> &offset) {
+  auto *param = test::CreateParameter<CropParameter>(schema::PrimitiveType_Crop);
+  for (size_t i = 0; i < offset.size(); i++) {
+    param->offset_[i] = offset[i];
+  }
+  param->axis_ = axis;
+  param->offset_size_ = static_cast<int>(offset.size());
+  return reinterpret_cast<OpParameter *>(param);
+}
+}  // namespace
+
+TEST_F(TestOpenCL_Crop, 4D_4D_Basic) {
+  std::vector<int> in_shape = {1, 2, 3, 4};
+  std::vector<int> out_shape = {1, 1, 1, 4};
+  float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+  float input_shape_data[4] = {0};
+  float output_data[] = {4, 5, 6, 7};
+  int64_t axis = 0;
+  std::vector<int> param_offset = {0, 0, 1, 0};
+  for (auto fp16_enable : {false, true}) {
+    TestMain({{in_shape, input_data, VAR, kNumberTypeFloat32}, {out_shape, input_shape_data, VAR, kNumberTypeFloat32}},
+             {out_shape, output_data}, CreateParameter(axis, param_offset), fp16_enable, 1e-9, 1e-9, true);
+  }
+}
+
+TEST_F(TestOpenCL_Crop, 4D_4D_AxisOffset) {
+  std::vector<int> in_shape = {1, 2, 3, 4};
+  std::vector<int> out_shape = {1, 1, 2, 4};
+  float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
+  float input_shape_data[8] = {0};
+  float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23};
+  int64_t axis = 1;
+  std::vector<int> param_offset = {1, 1, 0};
+  for (auto fp16_enable : {false, true}) {
+    TestMain({{in_shape, input_data, VAR, kNumberTypeFloat32}, {out_shape, input_shape_data, VAR, kNumberTypeFloat32}},
+             {out_shape, output_data}, CreateParameter(axis, param_offset), fp16_enable, 1e-9, 1e-9, true);
+  }
+}
+}  // namespace mindspore::lite::opencl::test