!45677 [MSLITE]新增端侧opencl算子Crop
Merge pull request !45677 from fatmouse007/for_gpu
This commit is contained in:
commit
ebaa385195
|
@ -0,0 +1,26 @@
|
|||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
#define C4NUM 4
|
||||
|
||||
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
||||
|
||||
__kernel void crop(__read_only image2d_t src_data, __write_only image2d_t dst_data,
|
||||
int4 in_shape, int4 out_shape, int4 offset) {
|
||||
int out_w = get_global_id(0);
|
||||
int out_h = get_global_id(1);
|
||||
|
||||
int out_batch_idx = out_h / out_shape.y;
|
||||
int out_height_idx = out_h % out_shape.y;
|
||||
int in_batch_idx = out_batch_idx + offset.x;
|
||||
int in_height_idx = out_height_idx + offset.y;
|
||||
int in_h = in_batch_idx * in_shape.y + in_height_idx;
|
||||
|
||||
int out_width_idx = (out_w * C4NUM) / out_shape.w;
|
||||
int out_channel_idx = (out_w * C4NUM) % out_shape.w;
|
||||
int in_width_idx = out_width_idx + offset.z;
|
||||
int in_channel_idx = out_channel_idx + offset.w;
|
||||
int in_w = in_width_idx * in_shape.w + in_channel_idx;
|
||||
|
||||
DTYPE4 res = READ_IMAGE(src_data, smp_zero, (int2)(in_w / C4NUM, in_h));
|
||||
WRITE_IMAGE(dst_data, (int2)(out_w, out_h), res);
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
|
||||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/litert/kernel/opencl/kernel/crop.h"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "src/litert/kernel_registry.h"
|
||||
#include "src/litert/kernel/opencl/utils.h"
|
||||
#include "src/litert/kernel/opencl/cl/crop.cl.inc"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kGPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Crop;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
const std::map<int, std::string> CROP_SUPPORT_DTYPES = {
|
||||
{kNumberTypeFloat32, "fp32"},
|
||||
{kNumberTypeFloat16, "fp16"},
|
||||
{kNumberTypeInt32, "int32"},
|
||||
};
|
||||
}
|
||||
|
||||
int CropOpenCLKernel::CheckSpecsWithoutShape() {
|
||||
auto input_dtype = in_tensors_.front()->data_type();
|
||||
if (CROP_SUPPORT_DTYPES.find(input_dtype) == CROP_SUPPORT_DTYPES.end()) {
|
||||
MS_LOG(WARNING) << "input dtype must be float32/float16/int32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto output_dtype = out_tensors_.front()->data_type();
|
||||
if (CROP_SUPPORT_DTYPES.find(output_dtype) == CROP_SUPPORT_DTYPES.end()) {
|
||||
MS_LOG(WARNING) << "output dtype must be float32/float16/int32";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CropOpenCLKernel::CheckSpecs() {
|
||||
if (in_tensors_.size() != INPUT_TENSOR_SIZE_2 || out_tensors_.size() != OUTPUT_TENSOR_SIZE_1) {
|
||||
MS_LOG(WARNING) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto input_tensor = in_tensors_.front();
|
||||
if ((input_tensor->Channel() % C4NUM) != 0) {
|
||||
MS_LOG(WARNING) << "input channel must can be divided by 4";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto output_tensor = out_tensors_.front();
|
||||
if ((output_tensor->Channel() % C4NUM) != 0) {
|
||||
MS_LOG(WARNING) << "output channel must can be divided by 4";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CropOpenCLKernel::Prepare() {
|
||||
out_gpu_info_ = GpuTensorInfo(out_tensors_[0]);
|
||||
|
||||
const std::string program_name = "crop_program";
|
||||
const std::string kernel_name = "crop";
|
||||
auto build_option_ext = CreateBuildOptionsExtByDType(this->registry_data_type());
|
||||
if (!ocl_runtime_->LoadSource(program_name, crop_source)) {
|
||||
MS_LOG(ERROR) << "Load source failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_option_ext);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Build kernel failed.";
|
||||
return ret;
|
||||
}
|
||||
if (SetConstArgs() != RET_OK) {
|
||||
MS_LOG(ERROR) << "SeConstArgs failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
(void)SetGlobalLocal();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void CropOpenCLKernel::RightShiftOffsetByAxis() {
|
||||
bzero(offset_, sizeof(int) * COMM_SHAPE_SIZE);
|
||||
for (int i = 0; i < crop_param_->offset_size_; i++) {
|
||||
int index = i + crop_param_->axis_;
|
||||
if ((index < 0) || (index >= COMM_SHAPE_SIZE)) {
|
||||
continue;
|
||||
}
|
||||
offset_[index] = crop_param_->offset_[i];
|
||||
}
|
||||
}
|
||||
|
||||
int CropOpenCLKernel::SetConstArgs() {
|
||||
auto out_tensor = out_tensors_[0];
|
||||
cl_int4 cl_out_shape = {static_cast<int>(out_tensor->Batch()), static_cast<int>(out_tensor->Height()),
|
||||
static_cast<int>(out_tensor->Width()), static_cast<int>(out_tensor->Channel())};
|
||||
auto in_tensor = in_tensors_[0];
|
||||
cl_int4 cl_in_shape = {static_cast<int>(in_tensor->Batch()), static_cast<int>(in_tensor->Height()),
|
||||
static_cast<int>(in_tensor->Width()), static_cast<int>(in_tensor->Channel())};
|
||||
RightShiftOffsetByAxis();
|
||||
cl_int4 cl_offset = {offset_[0], offset_[1], offset_[2], offset_[3]};
|
||||
|
||||
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX2, cl_in_shape) != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Set cl arg: in_shape failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX3, cl_out_shape) != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Set cl arg: out_shape failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX4, cl_offset) != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Set cl arg: offset failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CropOpenCLKernel::SetGlobalLocal() {
|
||||
global_size_ = {out_gpu_info_.width, out_gpu_info_.height};
|
||||
OpenCLKernel::AlignGlobalLocal(global_size_, {});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CropOpenCLKernel::Run() {
|
||||
MS_LOG(DEBUG) << this->name() << " Running! ";
|
||||
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX0, in_tensors_.front()->data()) != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "SetKernelArg failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ocl_runtime_->SetKernelArg(kernel_, CLARGSINDEX1, out_tensors_.front()->data()) != CL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "SetKernelArg failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
|
||||
MS_LOG(ERROR) << "RunKernel failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>)
|
||||
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>);
|
||||
REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Crop, OpenCLKernelCreator<CropOpenCLKernel>);
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CROP_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CROP_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "src/litert/kernel/opencl/opencl_kernel.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class CropOpenCLKernel : public OpenCLKernel {
|
||||
public:
|
||||
using OpenCLKernel::OpenCLKernel;
|
||||
CropOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: OpenCLKernel(parameter, inputs, outputs, ctx), crop_param_(reinterpret_cast<CropParameter *>(parameter)) {}
|
||||
~CropOpenCLKernel() override = default;
|
||||
|
||||
int Prepare() override;
|
||||
int CheckSpecsWithoutShape() override;
|
||||
int CheckSpecs() override;
|
||||
int SetConstArgs() override;
|
||||
int SetGlobalLocal() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
void RightShiftOffsetByAxis();
|
||||
|
||||
CropParameter *crop_param_ = nullptr;
|
||||
GpuTensorInfo out_gpu_info_ = {};
|
||||
int offset_[COMM_SHAPE_SIZE] = {0};
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -333,6 +333,7 @@ class OpenCLKernel : public LiteKernel {
|
|||
lite::opencl::OpenCLRuntimeInnerWrapper ocl_runtime_wrap_;
|
||||
static inline std::map<std::string, BaseTuningParameter> tuned_param_cache_;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
kernel::LiteKernel *OpenCLKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
|
|
|
@ -132,6 +132,7 @@ TestOpenCL_Reduce.Sum
|
|||
TestOpenCL_Reduce.MeanWC
|
||||
TestOpenCL_Reduce.SumWC
|
||||
TestOpenCL_Reduce.MeanC
|
||||
TestOpenCL_Crop.*
|
||||
MultipleDeviceTest.OldApi1
|
||||
MultipleDeviceTest.OldApi2
|
||||
MultipleDeviceTest.NewApi1
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <vector>
|
||||
#include "ut/src/runtime/kernel/opencl/common.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
|
||||
namespace mindspore::lite::opencl::test {
|
||||
class TestOpenCL_Crop : public CommonTest {};
|
||||
|
||||
namespace {
|
||||
// PrimitiveType_Reshape: src/ops/populate/crop_populate.cc
|
||||
OpParameter *CreateParameter(int64_t axis, const std::vector<int> &offset) {
|
||||
auto *param = test::CreateParameter<CropParameter>(schema::PrimitiveType_Crop);
|
||||
for (size_t i = 0; i < offset.size(); i++) {
|
||||
param->offset_[i] = offset[i];
|
||||
}
|
||||
param->axis_ = axis;
|
||||
param->offset_size_ = static_cast<int>(offset.size());
|
||||
return reinterpret_cast<OpParameter *>(param);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_F(TestOpenCL_Crop, 4D_4D_Basic) {
|
||||
std::vector<int> in_shape = {1, 2, 3, 4};
|
||||
std::vector<int> out_shape = {1, 1, 1, 4};
|
||||
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
|
||||
float input_shape_data[4] = {0};
|
||||
float output_data[] = {4, 5, 6, 7};
|
||||
int64_t axis = 0;
|
||||
std::vector<int> param_offset = {0, 0, 1, 0};
|
||||
for (auto fp16_enable : {false, true}) {
|
||||
TestMain({{in_shape, input_data, VAR, kNumberTypeFloat32}, {out_shape, input_shape_data, VAR, kNumberTypeFloat32}},
|
||||
{out_shape, output_data}, CreateParameter(axis, param_offset), fp16_enable, 1e-9, 1e-9, true);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestOpenCL_Crop, 4D_4D_AxisOffset) {
|
||||
std::vector<int> in_shape = {1, 2, 3, 4};
|
||||
std::vector<int> out_shape = {1, 1, 2, 4};
|
||||
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
|
||||
float input_shape_data[8] = {0};
|
||||
float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23};
|
||||
int64_t axis = 1;
|
||||
std::vector<int> param_offset = {1, 1, 0};
|
||||
for (auto fp16_enable : {false, true}) {
|
||||
TestMain({{in_shape, input_data, VAR, kNumberTypeFloat32}, {out_shape, input_shape_data, VAR, kNumberTypeFloat32}},
|
||||
{out_shape, output_data}, CreateParameter(axis, param_offset), fp16_enable, 1e-9, 1e-9, true);
|
||||
}
|
||||
}
|
||||
} // namespace mindspore::lite::opencl::test
|
Loading…
Reference in New Issue