forked from mindspore-Ecosystem/mindspore
!5662 add nc4hw4 to opencl depthwise
Merge pull request !5662 from wandongdong/master
This commit is contained in:
commit
e06dc0a946
|
@ -27,6 +27,33 @@ __kernel void to_format_NHWC_to_NHWC4_IMG(__global FLT4 *src_data, __write_only
|
||||||
}
|
}
|
||||||
WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), data);
|
WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), data);
|
||||||
}
|
}
|
||||||
|
__kernel void to_format_NHWC_to_NC4HW4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
||||||
|
int4 shape) {
|
||||||
|
int X = get_global_id(0);
|
||||||
|
int Y = get_global_id(1);
|
||||||
|
int Z = get_global_id(2);
|
||||||
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int offset = (X * shape.z + Y) * shape.w + Z * 4;
|
||||||
|
__global FLT *src_addr = (__global FLT *)src_data;
|
||||||
|
src_addr += offset;
|
||||||
|
FLT4 data = (FLT4)(0.f);
|
||||||
|
if ((Z + 1) * 4 <= shape.w) {
|
||||||
|
data = ((__global FLT4 *)src_addr)[0];
|
||||||
|
} else {
|
||||||
|
if ((shape.w - Z * 4) >= 1) {
|
||||||
|
data.x = src_addr[0];
|
||||||
|
}
|
||||||
|
if ((shape.w - Z * 4) >= 2) {
|
||||||
|
data.y = src_addr[1];
|
||||||
|
}
|
||||||
|
if ((shape.w - Z * 4) >= 3) {
|
||||||
|
data.z = src_addr[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WRITE_IMAGE(dst_data, (int2)(Y, Z * size.x + X), data);
|
||||||
|
}
|
||||||
__kernel void to_format_NHWC4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
__kernel void to_format_NHWC4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
||||||
int4 shape) {
|
int4 shape) {
|
||||||
int X = get_global_id(0);
|
int X = get_global_id(0);
|
||||||
|
@ -84,6 +111,32 @@ __kernel void to_format_NHWC4_to_NHWC_BUF(__read_only image2d_t src_data, __glob
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
__kernel void to_format_NC4HW4_to_NHWC_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
||||||
|
int4 shape) {
|
||||||
|
int X = get_global_id(0);
|
||||||
|
int Y = get_global_id(1);
|
||||||
|
int Z = get_global_id(2);
|
||||||
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FLT4 data = READ_IMAGE(src_data, smp_zero, (int2)(Y, Z * size.x + X));
|
||||||
|
int offset = (X * shape.z + Y) * shape.w + Z * 4;
|
||||||
|
__global FLT *dst_addr = (__global FLT *)dst_data;
|
||||||
|
dst_addr += offset;
|
||||||
|
if ((Z + 1) * 4 <= shape.w) {
|
||||||
|
((__global FLT4 *)dst_addr)[0] = data;
|
||||||
|
} else {
|
||||||
|
if (shape.w - Z * 4 >= 1) {
|
||||||
|
dst_addr[0] = data.x;
|
||||||
|
}
|
||||||
|
if (shape.w - Z * 4 >= 2) {
|
||||||
|
dst_addr[1] = data.y;
|
||||||
|
}
|
||||||
|
if (shape.w - Z * 4 >= 3) {
|
||||||
|
dst_addr[2] = data.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
__kernel void to_format_NC4HW4_to_NC4HW4_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
__kernel void to_format_NC4HW4_to_NC4HW4_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
||||||
int4 shape) {
|
int4 shape) {
|
||||||
// size(h, w, c, 1), shape(n, c, h, w)
|
// size(h, w, c, 1), shape(n, c, h, w)
|
||||||
|
|
|
@ -43,12 +43,9 @@ namespace mindspore::kernel {
|
||||||
int DepthwiseConv2dOpenCLKernel::Init() {
|
int DepthwiseConv2dOpenCLKernel::Init() {
|
||||||
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
|
||||||
std::string kernel_name = "DepthwiseConv2d";
|
std::string kernel_name = "DepthwiseConv2d";
|
||||||
auto in_format = in_tensors_[0]->GetFormat();
|
auto in_format = op_format_;
|
||||||
in_ori_format_ = in_tensors_[0]->GetFormat();
|
in_ori_format_ = in_tensors_[0]->GetFormat();
|
||||||
out_ori_format_ = out_tensors_[0]->GetFormat();
|
out_ori_format_ = out_tensors_[0]->GetFormat();
|
||||||
in_format = (in_format == schema::Format_NHWC)
|
|
||||||
? schema::Format_NHWC4
|
|
||||||
: ((in_format == schema::Format_NCHW) ? schema::Format_NC4HW4 : in_format);
|
|
||||||
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
|
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
|
||||||
MS_LOG(ERROR) << "input format(" << in_format << ") "
|
MS_LOG(ERROR) << "input format(" << in_format << ") "
|
||||||
<< "format not support!";
|
<< "format not support!";
|
||||||
|
|
|
@ -65,13 +65,13 @@ int ToFormatOpenCLKernel::Init() {
|
||||||
int ToFormatOpenCLKernel::InitNHWCShape() {
|
int ToFormatOpenCLKernel::InitNHWCShape() {
|
||||||
std::vector<int> shapex = out_tensors_[0]->shape();
|
std::vector<int> shapex = out_tensors_[0]->shape();
|
||||||
size_t n, h, w, c;
|
size_t n, h, w, c;
|
||||||
if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4 || out_tensors_[0]->GetFormat() == schema::Format_NHWC) {
|
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4 || out_tensors_[0]->GetFormat() == schema::Format_NHWC4 ||
|
||||||
|
out_tensors_[0]->GetFormat() == schema::Format_NHWC) {
|
||||||
n = shapex[0];
|
n = shapex[0];
|
||||||
h = shapex[1];
|
h = shapex[1];
|
||||||
w = shapex[2];
|
w = shapex[2];
|
||||||
c = shapex[3];
|
c = shapex[3];
|
||||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4 ||
|
} else if (out_tensors_[0]->GetFormat() == schema::Format_NCHW) {
|
||||||
out_tensors_[0]->GetFormat() == schema::Format_NCHW) {
|
|
||||||
n = shapex[0];
|
n = shapex[0];
|
||||||
h = shapex[2];
|
h = shapex[2];
|
||||||
w = shapex[3];
|
w = shapex[3];
|
||||||
|
@ -105,21 +105,20 @@ int ToFormatOpenCLKernel::GetLocalSize(size_t idx, const std::vector<size_t> &gl
|
||||||
|
|
||||||
int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
|
int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
|
||||||
size_t im_dst_x, im_dst_y;
|
size_t im_dst_x, im_dst_y;
|
||||||
std::vector<int> shapex = out_tensors_[0]->shape();
|
|
||||||
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) {
|
||||||
int c = shapex[1] * shapex[2];
|
int c = nhwc_shape_[3];
|
||||||
int h = shapex[0];
|
int h = nhwc_shape_[1];
|
||||||
int w = shapex[3];
|
int w = nhwc_shape_[2];
|
||||||
im_dst_y = h * UP_DIV(c, C4NUM);
|
im_dst_y = nhwc_shape_[0] * h * UP_DIV(c, C4NUM);
|
||||||
im_dst_x = w;
|
im_dst_x = w;
|
||||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
|
} else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) {
|
||||||
int h = shapex[0] * shapex[1];
|
int h = nhwc_shape_[0] * nhwc_shape_[1];
|
||||||
int w = shapex[2];
|
int w = nhwc_shape_[2];
|
||||||
int c = shapex[3];
|
int c = nhwc_shape_[3];
|
||||||
im_dst_x = w * UP_DIV(c, C4NUM);
|
im_dst_x = w * UP_DIV(c, C4NUM);
|
||||||
im_dst_y = h;
|
im_dst_y = h;
|
||||||
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
|
} else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) {
|
||||||
int c = shapex[1];
|
int c = nhwc_shape_[1];
|
||||||
im_dst_x = UP_DIV(c, C4NUM);
|
im_dst_x = UP_DIV(c, C4NUM);
|
||||||
im_dst_y = 1;
|
im_dst_y = 1;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -58,7 +58,7 @@ class OpenCLKernel : public LiteKernel {
|
||||||
OpenCLMemType out_mem_type_{OpenCLMemType::IMG};
|
OpenCLMemType out_mem_type_{OpenCLMemType::IMG};
|
||||||
schema::Format in_ori_format_{schema::Format_NHWC};
|
schema::Format in_ori_format_{schema::Format_NHWC};
|
||||||
schema::Format out_ori_format_{schema::Format_NHWC4};
|
schema::Format out_ori_format_{schema::Format_NHWC4};
|
||||||
schema::Format op_format_{schema::Format_NC4HW4};
|
schema::Format op_format_{schema::Format_NHWC4};
|
||||||
};
|
};
|
||||||
} // namespace mindspore::kernel
|
} // namespace mindspore::kernel
|
||||||
|
|
||||||
|
|
|
@ -66,12 +66,12 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat
|
||||||
std::vector<int> shape_bias = {conv_param->output_channel_};
|
std::vector<int> shape_bias = {conv_param->output_channel_};
|
||||||
std::vector<int> shape_out;
|
std::vector<int> shape_out;
|
||||||
std::vector<int> shape_in;
|
std::vector<int> shape_in;
|
||||||
if (format == schema::Format_NHWC || format == schema::Format_NHWC4) {
|
if (format == schema::Format_NHWC || format == schema::Format_NHWC4 || format == schema::Format_NC4HW4) {
|
||||||
shape_in = std::vector<int>(
|
shape_in = std::vector<int>(
|
||||||
{conv_param->input_batch_, conv_param->input_h_, conv_param->input_w_, conv_param->input_channel_});
|
{conv_param->input_batch_, conv_param->input_h_, conv_param->input_w_, conv_param->input_channel_});
|
||||||
shape_out = std::vector<int>(
|
shape_out = std::vector<int>(
|
||||||
{conv_param->output_batch_, conv_param->output_h_, conv_param->output_w_, conv_param->output_channel_});
|
{conv_param->output_batch_, conv_param->output_h_, conv_param->output_w_, conv_param->output_channel_});
|
||||||
} else if (format == schema::Format_NCHW || format == schema::Format_NC4HW4) {
|
} else if (format == schema::Format_NCHW) {
|
||||||
shape_in = std::vector<int>(
|
shape_in = std::vector<int>(
|
||||||
{conv_param->input_batch_, conv_param->input_channel_, conv_param->input_h_, conv_param->input_w_});
|
{conv_param->input_batch_, conv_param->input_channel_, conv_param->input_h_, conv_param->input_w_});
|
||||||
shape_out = std::vector<int>(
|
shape_out = std::vector<int>(
|
||||||
|
@ -98,6 +98,7 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat
|
||||||
delete[] packed_input;
|
delete[] packed_input;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
pKernel->SetFormatType(format);
|
||||||
pKernel->Init();
|
pKernel->Init();
|
||||||
|
|
||||||
std::vector<kernel::LiteKernel *> kernels{pKernel.get()};
|
std::vector<kernel::LiteKernel *> kernels{pKernel.get()};
|
||||||
|
|
Loading…
Reference in New Issue