!45677 [MSLITE]新增端侧opencl算子Crop

Merge pull request !45677 from fatmouse007/for_gpu
This commit is contained in:
i-robot 2022-11-21 07:57:12 +00:00 committed by Gitee
commit ebaa385195
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 301 additions and 1 deletions

View File

@ -0,0 +1,26 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define C4NUM 4
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__kernel void crop(__read_only image2d_t src_data, __write_only image2d_t dst_data,
int4 in_shape, int4 out_shape, int4 offset) {
int out_w = get_global_id(0);
int out_h = get_global_id(1);
int out_batch_idx = out_h / out_shape.y;
int out_height_idx = out_h % out_shape.y;
int in_batch_idx = out_batch_idx + offset.x;
int in_height_idx = out_height_idx + offset.y;
int in_h = in_batch_idx * in_shape.y + in_height_idx;
int out_width_idx = (out_w * C4NUM) / out_shape.w;
int out_channel_idx = (out_w * C4NUM) % out_shape.w;
int in_width_idx = out_width_idx + offset.z;
int in_channel_idx = out_channel_idx + offset.w;
int in_w = in_width_idx * in_shape.w + in_channel_idx;
DTYPE4 res = READ_IMAGE(src_data, smp_zero, (int2)(in_w / C4NUM, in_h));
WRITE_IMAGE(dst_data, (int2)(out_w, out_h), res);
}

View File

@ -0,0 +1,160 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/litert/kernel/opencl/kernel/crop.h"
#include <map>
#include <string>
#include "src/litert/kernel_registry.h"
#include "src/litert/kernel/opencl/utils.h"
#include "src/litert/kernel/opencl/cl/crop.cl.inc"
using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Crop;
namespace mindspore::kernel {
namespace {
const std::map<int, std::string> CROP_SUPPORT_DTYPES = {
{kNumberTypeFloat32, "fp32"},
{kNumberTypeFloat16, "fp16"},
{kNumberTypeInt32, "int32"},
};
}
int CropOpenCLKernel::CheckSpecsWithoutShape() {
auto input_dtype = in_tensors_.front()->data_type();
if (CROP_SUPPORT_DTYPES.find(input_dtype) == CROP_SUPPORT_DTYPES.end()) {
MS_LOG(WARNING) << "input dtype must be float32/float16/int32";
return RET_ERROR;
}
auto output_dtype = out_tensors_.front()->data_type();
if (CROP_SUPPORT_DTYPES.find(output_dtype) == CROP_SUPPORT_DTYPES.end()) {
MS_LOG(WARNING) << "output dtype must be float32/float16/int32";
return RET_ERROR;
}
return RET_OK;
}
int CropOpenCLKernel::CheckSpecs() {
if (in_tensors_.size() != INPUT_TENSOR_SIZE_2 || out_tensors_.size() != OUTPUT_TENSOR_SIZE_1) {
MS_LOG(WARNING) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size();
return RET_ERROR;
}
auto input_tensor = in_tensors_.front();
if ((input_tensor->Channel() % C4NUM) != 0) {
MS_LOG(WARNING) << "input channel must can be divided by 4";
return RET_ERROR;
}
auto output_tensor = out_tensors_.front();
if ((output_tensor->Channel() % C4NUM) != 0) {
MS_LOG(WARNING) << "output channel must can be divided by 4";
return RET_ERROR;
}
return RET_OK;
}
int CropOpenCLKernel::Prepare() {
out_gpu_info_ = GpuTensorInfo(out_tensors_[0]);
const std::string program_name = "crop_program";
const std::string kernel_name = "crop";
auto build_option_ext = CreateBuildOptionsExtByDType(this->registry_data_type());
if (!ocl_runtime_->LoadSource(program_name, crop_source)) {
MS_LOG(ERROR) << "Load source failed.";
return RET_ERROR;
}
auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_option_ext);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Build kernel failed.";
return ret;
}
if (SetConstArgs() != RET_OK) {
MS_LOG(ERROR) << "SeConstArgs failed.";
return RET_ERROR;
}
(void)SetGlobalLocal();
return RET_OK;
}
void CropOpenCLKernel::RightShiftOffsetByAxis() {
bzero(offset_, sizeof(int) * COMM_SHAPE_SIZE);
for (int i = 0; i < crop_param_->offset_size_; i++) {
int index = i + crop_param_->axis_;
if ((index < 0) || (index >= COMM_SHAPE_SIZE)) {
continue;
}
offset_[index] = crop_param_->offset_[i];
}
}
int CropOpenCLKernel::SetConstArgs() {
auto out_tensor = out_tensors_[0];
cl_int4 cl_out_shape = {static_cast<int>(out_tensor->Batch()), static_cast<int>(out_tensor->Height()),
static_cast<int>(out_tensor->Width()), static_cast<int>(out_tensor->Channel())};
auto in_tensor = in_tensors_[0];
cl_int4 cl_in_shape = {static_cast<int>(in_tensor->Batch()), static_cast<int>(in_tensor->Height()),
static_cast<int>(in_tensor->Width()), static_cast<int>(in_tensor->Channel())};
RightShiftOffsetByAxis();
cl_int4 cl_offset = {offset_[0], offset_[1], offset_[2], offset_[3]};
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX2, cl_in_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "Set cl arg: in_shape failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX3, cl_out_shape) != CL_SUCCESS) {
MS_LOG(ERROR) << "Set cl arg: out_shape failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX4, cl_offset) != CL_SUCCESS) {
MS_LOG(ERROR) << "Set cl arg: offset failed.";
return RET_ERROR;
}
return RET_OK;
}
int CropOpenCLKernel::SetGlobalLocal() {
global_size_ = {out_gpu_info_.width, out_gpu_info_.height};
OpenCLKernel::AlignGlobalLocal(global_size_, {});
return RET_OK;
}
int CropOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX0, in_tensors_.front()->data()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX1, out_tensors_.front()->data()) != CL_SUCCESS) {
MS_LOG(ERROR) << "SetKernelArg failed.";
return RET_ERROR;
}
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
MS_LOG(ERROR) << "RunKernel failed.";
return RET_ERROR;
}
return RET_OK;
}
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>);
REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>);
} // namespace mindspore::kernel

View File

@ -0,0 +1,49 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CROP_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CROP_H_
#include <vector>
#include <string>
#include "src/litert/kernel/opencl/opencl_kernel.h"
#include "nnacl/crop_parameter.h"
namespace mindspore::kernel {
class CropOpenCLKernel : public OpenCLKernel {
public:
using OpenCLKernel::OpenCLKernel;
CropOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: OpenCLKernel(parameter, inputs, outputs, ctx), crop_param_(reinterpret_cast<CropParameter *>(parameter)) {}
~CropOpenCLKernel() override = default;
int Prepare() override;
int CheckSpecsWithoutShape() override;
int CheckSpecs() override;
int SetConstArgs() override;
int SetGlobalLocal() override;
int Run() override;
private:
void RightShiftOffsetByAxis();
CropParameter *crop_param_ = nullptr;
GpuTensorInfo out_gpu_info_ = {};
int offset_[COMM_SHAPE_SIZE] = {0};
};
} // namespace mindspore::kernel
#endif

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -333,6 +333,7 @@ class OpenCLKernel : public LiteKernel {
lite::opencl::OpenCLRuntimeInnerWrapper ocl_runtime_wrap_;
static inline std::map<std::string, BaseTuningParameter> tuned_param_cache_;
};
template <class T>
kernel::LiteKernel *OpenCLKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,

View File

@ -132,6 +132,7 @@ TestOpenCL_Reduce.Sum
TestOpenCL_Reduce.MeanWC
TestOpenCL_Reduce.SumWC
TestOpenCL_Reduce.MeanC
TestOpenCL_Crop.*
MultipleDeviceTest.OldApi1
MultipleDeviceTest.OldApi2
MultipleDeviceTest.NewApi1

View File

@ -0,0 +1,63 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include "ut/src/runtime/kernel/opencl/common.h"
#include "nnacl/crop_parameter.h"
namespace mindspore::lite::opencl::test {
class TestOpenCL_Crop : public CommonTest {};
namespace {
// PrimitiveType_Reshape: src/ops/populate/crop_populate.cc
OpParameter *CreateParameter(int64_t axis, const std::vector<int> &offset) {
auto *param = test::CreateParameter<CropParameter>(schema::PrimitiveType_Crop);
for (size_t i = 0; i < offset.size(); i++) {
param->offset_[i] = offset[i];
}
param->axis_ = axis;
param->offset_size_ = static_cast<int>(offset.size());
return reinterpret_cast<OpParameter *>(param);
}
} // namespace
TEST_F(TestOpenCL_Crop, 4D_4D_Basic) {
std::vector<int> in_shape = {1, 2, 3, 4};
std::vector<int> out_shape = {1, 1, 1, 4};
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
float input_shape_data[4] = {0};
float output_data[] = {4, 5, 6, 7};
int64_t axis = 0;
std::vector<int> param_offset = {0, 0, 1, 0};
for (auto fp16_enable : {false, true}) {
TestMain({{in_shape, input_data, VAR, kNumberTypeFloat32}, {out_shape, input_shape_data, VAR, kNumberTypeFloat32}},
{out_shape, output_data}, CreateParameter(axis, param_offset), fp16_enable, 1e-9, 1e-9, true);
}
}
TEST_F(TestOpenCL_Crop, 4D_4D_AxisOffset) {
std::vector<int> in_shape = {1, 2, 3, 4};
std::vector<int> out_shape = {1, 1, 2, 4};
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
float input_shape_data[8] = {0};
float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23};
int64_t axis = 1;
std::vector<int> param_offset = {1, 1, 0};
for (auto fp16_enable : {false, true}) {
TestMain({{in_shape, input_data, VAR, kNumberTypeFloat32}, {out_shape, input_shape_data, VAR, kNumberTypeFloat32}},
{out_shape, output_data}, CreateParameter(axis, param_offset), fp16_enable, 1e-9, 1e-9, true);
}
}
} // namespace mindspore::lite::opencl::test