forked from mindspore-Ecosystem/mindspore
!11443 [MS][LITE][GPU]add depthtospace
From: @chenzupeng Reviewed-by: @ddwsky,@zhanghaibo5 Signed-off-by: @ddwsky
This commit is contained in:
commit
b16aab7831
|
@ -56,3 +56,57 @@ __kernel void SpaceToDepthAlign(__read_only image2d_t src_data, __write_only ima
|
||||||
WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z),
|
WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z),
|
||||||
READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci, ni * in_shape.y + hi)));
|
READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci, ni * in_shape.y + hi)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kernel void DepthToSpace(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 in_shape,
|
||||||
|
int4 out_shape, int block_size, int co_size) {
|
||||||
|
int X = get_global_id(0); // C4
|
||||||
|
int Y = get_global_id(1); // W
|
||||||
|
int Z = get_global_id(2); // H * N
|
||||||
|
if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return;
|
||||||
|
if (out_shape.y == 0 || co_size == 0) return;
|
||||||
|
int N = Z / out_shape.y;
|
||||||
|
int H = Z % out_shape.y;
|
||||||
|
int co_base = X * C4NUM;
|
||||||
|
FLT result[C4NUM] = {0.f};
|
||||||
|
for (int i = 0; i < C4NUM; i++) {
|
||||||
|
int co = co_base + i;
|
||||||
|
int bh = H % block_size;
|
||||||
|
int hi = H / block_size;
|
||||||
|
int bw = Y % block_size;
|
||||||
|
int wi = Y / block_size;
|
||||||
|
int ci = (bh * block_size + bw) * co_size + co;
|
||||||
|
int ci4 = ci / C4NUM;
|
||||||
|
int ci4_ramainder = ci % C4NUM;
|
||||||
|
FLT4 tmp = READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci4, N * in_shape.y + hi));
|
||||||
|
if (ci4_ramainder == 0) {
|
||||||
|
result[i] = tmp.x;
|
||||||
|
} else if (ci4_ramainder == 1) {
|
||||||
|
result[i] = tmp.y;
|
||||||
|
} else if (ci4_ramainder == 2) {
|
||||||
|
result[i] = tmp.z;
|
||||||
|
} else {
|
||||||
|
result[i] = tmp.w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FLT4 result_flt4 = {result[0], result[1], result[2], result[3]};
|
||||||
|
WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result_flt4);
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void DepthToSpaceAlign(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 in_shape,
|
||||||
|
int4 out_shape, int block_size, int co_size) {
|
||||||
|
int X = get_global_id(0); // C4
|
||||||
|
int Y = get_global_id(1); // W
|
||||||
|
int Z = get_global_id(2); // H * N
|
||||||
|
if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return;
|
||||||
|
if (out_shape.y == 0 || block_size == 0) return;
|
||||||
|
int N = Z / out_shape.y;
|
||||||
|
int H = Z % out_shape.y;
|
||||||
|
int ni = N;
|
||||||
|
int bh = H % block_size;
|
||||||
|
int hi = H / block_size;
|
||||||
|
int bw = Y % block_size;
|
||||||
|
int wi = Y / block_size;
|
||||||
|
int ci = (bh * block_size + bw) * out_shape.w + X;
|
||||||
|
WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z),
|
||||||
|
READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci, ni * in_shape.y + hi)));
|
||||||
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_NULL_PTR;
|
using mindspore::lite::RET_NULL_PTR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
using mindspore::lite::RET_PARAM_INVALID;
|
using mindspore::lite::RET_PARAM_INVALID;
|
||||||
|
using mindspore::schema::PrimitiveType_DepthToSpace;
|
||||||
using mindspore::schema::PrimitiveType_SpaceToDepth;
|
using mindspore::schema::PrimitiveType_SpaceToDepth;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
|
@ -43,10 +44,13 @@ int SpaceToDepthOpenCLKernel::Prepare() {
|
||||||
std::string kernel_name;
|
std::string kernel_name;
|
||||||
in_shape_ = GpuTensorInfo(in_tensors_[0]);
|
in_shape_ = GpuTensorInfo(in_tensors_[0]);
|
||||||
out_shape_ = GpuTensorInfo(out_tensors_[0]);
|
out_shape_ = GpuTensorInfo(out_tensors_[0]);
|
||||||
if (in_shape_.C % C4NUM != 0) {
|
if (Type() == PrimitiveType_DepthToSpace) {
|
||||||
kernel_name = "SpaceToDepth";
|
kernel_name = "DepthToSpace";
|
||||||
} else {
|
} else {
|
||||||
kernel_name = "SpaceToDepthAlign";
|
kernel_name = "SpaceToDepth";
|
||||||
|
}
|
||||||
|
if (in_shape_.C % C4NUM == 0 && out_shape_.C % C4NUM == 0) {
|
||||||
|
kernel_name += "Align";
|
||||||
}
|
}
|
||||||
#ifdef PROGRAM_WITH_IL
|
#ifdef PROGRAM_WITH_IL
|
||||||
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
|
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
|
||||||
|
@ -71,8 +75,13 @@ void SpaceToDepthOpenCLKernel::SetConstArgs() {
|
||||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape);
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape);
|
||||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
|
||||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_);
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_);
|
||||||
int ci_size = in_shape_.C;
|
if (Type() == PrimitiveType_DepthToSpace) {
|
||||||
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size);
|
int co_size = out_shape_.C;
|
||||||
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size);
|
||||||
|
} else {
|
||||||
|
int ci_size = in_shape_.C;
|
||||||
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
|
void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
|
||||||
local_size_ = {};
|
local_size_ = {};
|
||||||
|
@ -91,4 +100,6 @@ int SpaceToDepthOpenCLKernel::Run() {
|
||||||
|
|
||||||
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_SpaceToDepth, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
|
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_SpaceToDepth, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
|
||||||
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_SpaceToDepth, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
|
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_SpaceToDepth, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
|
||||||
|
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DepthToSpace, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
|
||||||
|
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_DepthToSpace, OpenCLKernelCreator<SpaceToDepthOpenCLKernel>)
|
||||||
} // namespace mindspore::kernel
|
} // namespace mindspore::kernel
|
||||||
|
|
|
@ -15,10 +15,12 @@
|
||||||
*/
|
*/
|
||||||
#include "ut/src/runtime/kernel/opencl/common.h"
|
#include "ut/src/runtime/kernel/opencl/common.h"
|
||||||
#include "nnacl/fp32/space_to_depth_fp32.h"
|
#include "nnacl/fp32/space_to_depth_fp32.h"
|
||||||
|
#include "nnacl/depth_to_space_parameter.h"
|
||||||
|
|
||||||
namespace mindspore::lite::opencl::test {
|
namespace mindspore::lite::opencl::test {
|
||||||
|
|
||||||
class TestOpenCL_SpaceToDepth : public CommonTest {};
|
class TestOpenCL_SpaceToDepth : public CommonTest {};
|
||||||
|
class TestOpenCL_DepthToSpace : public CommonTest {};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// PrimitiveType_SpaceToDepth: src/ops/populate/space_to_depth_populate.cc
|
// PrimitiveType_SpaceToDepth: src/ops/populate/space_to_depth_populate.cc
|
||||||
|
@ -27,6 +29,12 @@ OpParameter *CreateParameter(int block_size) {
|
||||||
param->block_size_ = block_size;
|
param->block_size_ = block_size;
|
||||||
return reinterpret_cast<OpParameter *>(param);
|
return reinterpret_cast<OpParameter *>(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OpParameter *CreateDepthToSpaceParameter(int block_size) {
|
||||||
|
auto *param = test::CreateParameter<DepthToSpaceParameter>(schema::PrimitiveType_DepthToSpace);
|
||||||
|
param->block_size_ = block_size;
|
||||||
|
return reinterpret_cast<OpParameter *>(param);
|
||||||
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TEST_F(TestOpenCL_SpaceToDepth, AlignTest1) {
|
TEST_F(TestOpenCL_SpaceToDepth, AlignTest1) {
|
||||||
|
@ -163,4 +171,166 @@ TEST_F(TestOpenCL_SpaceToDepth, NotAlignTest4) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, AlignTest1) {
|
||||||
|
int block_size = 2;
|
||||||
|
std::vector<int> input_shape = {1, 2, 2, 16};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
|
||||||
|
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
|
||||||
|
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
|
||||||
|
float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13,
|
||||||
|
14, 15, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51,
|
||||||
|
52, 53, 54, 55, 40, 41, 42, 43, 44, 45, 46, 47, 56, 57, 58, 59, 60, 61, 62, 63};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, AlignTest2) {
|
||||||
|
int block_size = 2;
|
||||||
|
std::vector<int> input_shape = {1, 1, 2, 16};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||||
|
float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, AlignTest3) {
|
||||||
|
int block_size = 3;
|
||||||
|
std::vector<int> input_shape = {1, 1, 2, 36};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
|
||||||
|
float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 36, 37, 38, 39, 40, 41,
|
||||||
|
42, 43, 44, 45, 46, 47, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 24, 25, 26, 27, 28, 29,
|
||||||
|
30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, AlignTest4) {
|
||||||
|
int block_size = 4;
|
||||||
|
std::vector<int> input_shape = {1, 1, 1, 64};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
|
||||||
|
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
|
||||||
|
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
|
||||||
|
float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
|
||||||
|
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
|
||||||
|
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, NotAlignTest1) {
|
||||||
|
int block_size = 2;
|
||||||
|
std::vector<int> input_shape = {1, 3, 3, 8};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71};
|
||||||
|
float output_data[] = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 4, 5, 6, 7, 12, 13,
|
||||||
|
14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 32, 33, 34, 35, 40, 41, 42, 43,
|
||||||
|
28, 29, 30, 31, 36, 37, 38, 39, 44, 45, 46, 47, 48, 49, 50, 51, 56, 57,
|
||||||
|
58, 59, 64, 65, 66, 67, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, NotAlignTest2) {
|
||||||
|
int block_size = 3;
|
||||||
|
std::vector<int> input_shape = {1, 3, 3, 9};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||||
|
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
|
||||||
|
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
|
||||||
|
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80};
|
||||||
|
float output_data[] = {0, 1, 2, 9, 10, 11, 18, 19, 20, 3, 4, 5, 12, 13, 14, 21, 22, 23, 6, 7, 8,
|
||||||
|
15, 16, 17, 24, 25, 26, 27, 28, 29, 36, 37, 38, 45, 46, 47, 30, 31, 32, 39, 40, 41,
|
||||||
|
48, 49, 50, 33, 34, 35, 42, 43, 44, 51, 52, 53, 54, 55, 56, 63, 64, 65, 72, 73, 74,
|
||||||
|
57, 58, 59, 66, 67, 68, 75, 76, 77, 60, 61, 62, 69, 70, 71, 78, 79, 80};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, NotAlignTest3) {
|
||||||
|
int block_size = 4;
|
||||||
|
std::vector<int> input_shape = {1, 3, 2, 32};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
|
||||||
|
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
|
||||||
|
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
|
||||||
|
66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
|
||||||
|
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
|
||||||
|
110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
|
||||||
|
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
|
||||||
|
154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
||||||
|
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191};
|
||||||
|
float output_data[] = {
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8, 9, 10, 11, 12, 13,
|
||||||
|
14, 15, 40, 41, 42, 43, 44, 45, 46, 47, 16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51,
|
||||||
|
52, 53, 54, 55, 24, 25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
|
||||||
|
66, 67, 68, 69, 70, 71, 96, 97, 98, 99, 100, 101, 102, 103, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||||
|
104, 105, 106, 107, 108, 109, 110, 111, 80, 81, 82, 83, 84, 85, 86, 87, 112, 113, 114, 115, 116, 117,
|
||||||
|
118, 119, 88, 89, 90, 91, 92, 93, 94, 95, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
|
||||||
|
132, 133, 134, 135, 160, 161, 162, 163, 164, 165, 166, 167, 136, 137, 138, 139, 140, 141, 142, 143, 168, 169,
|
||||||
|
170, 171, 172, 173, 174, 175, 144, 145, 146, 147, 148, 149, 150, 151, 176, 177, 178, 179, 180, 181, 182, 183,
|
||||||
|
152, 153, 154, 155, 156, 157, 158, 159, 184, 185, 186, 187, 188, 189, 190, 191};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestOpenCL_DepthToSpace, NotAlignTest4) {
|
||||||
|
int block_size = 2;
|
||||||
|
std::vector<int> input_shape = {1, 3, 4, 8};
|
||||||
|
std::vector<int> output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size,
|
||||||
|
input_shape[3] / (block_size * block_size)};
|
||||||
|
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
|
||||||
|
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95};
|
||||||
|
float output_data[] = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, 4, 5, 6, 7,
|
||||||
|
12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 32, 33, 34, 35, 40, 41, 42, 43,
|
||||||
|
48, 49, 50, 51, 56, 57, 58, 59, 36, 37, 38, 39, 44, 45, 46, 47, 52, 53, 54, 55,
|
||||||
|
60, 61, 62, 63, 64, 65, 66, 67, 72, 73, 74, 75, 80, 81, 82, 83, 88, 89, 90, 91,
|
||||||
|
68, 69, 70, 71, 76, 77, 78, 79, 84, 85, 86, 87, 92, 93, 94, 95};
|
||||||
|
|
||||||
|
for (auto fp16_enable : {false, true}) {
|
||||||
|
auto *param = CreateDepthToSpaceParameter(block_size);
|
||||||
|
TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable);
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace mindspore::lite::opencl::test
|
} // namespace mindspore::lite::opencl::test
|
||||||
|
|
Loading…
Reference in New Issue