conv2d transpose support fp16

This commit is contained in:
chenzupeng 2020-08-25 10:46:10 +08:00
parent 5b722a1037
commit d950386eb0
7 changed files with 127 additions and 54 deletions

View File

@ -1,3 +1,4 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__kernel void conv2d_transpose2x2(__read_only image2d_t src_data, __global FLT16 *weight, __read_only image2d_t biases, __kernel void conv2d_transpose2x2(__read_only image2d_t src_data, __global FLT16 *weight, __read_only image2d_t biases,
__write_only image2d_t dst_data, int2 kernel_size, int2 stride, int2 padding, __write_only image2d_t dst_data, int2 kernel_size, int2 stride, int2 padding,

View File

@ -1,3 +1,4 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__kernel void to_format_NCHW_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size, __kernel void to_format_NCHW_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
int4 shape) { int4 shape) {

View File

@ -16,6 +16,7 @@
#include <string> #include <string>
#include <set> #include <set>
#include "src/common/utils.h"
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h" #include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h" #include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h"
@ -41,6 +42,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
} }
std::string kernel_name = "conv2d_transpose2x2"; std::string kernel_name = "conv2d_transpose2x2";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
#ifdef PROGRAM_WITH_IL #ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
#else #else
@ -70,13 +72,18 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
int div_ci = UP_DIV(ci, C4NUM); int div_ci = UP_DIV(ci, C4NUM);
int div_co = UP_DIV(co, C4NUM); int div_co = UP_DIV(co, C4NUM);
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto data_size = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
using FLT = float;
if (enable_fp16_) {
using FLT = float16_t;
}
// IHWO to OHWI4(I)4(O)(converter format is IHWO) // IHWO to OHWI4(I)4(O)(converter format is IHWO)
// init padWeight_(buffer mem) // init padWeight_(buffer mem)
padWeight_ = padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size);
reinterpret_cast<FLOAT_t *>(allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * sizeof(FLOAT_t))); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
padWeight_ = reinterpret_cast<FLOAT_t *>(allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true)); auto origin_weight = in_tensors_.at(kWeightIndex)->Data();
auto origin_weight = reinterpret_cast<FLOAT_t *>(in_tensors_.at(kWeightIndex)->Data()); auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type();
int index = 0; int index = 0;
for (int co_i = 0; co_i < div_co; co_i++) { for (int co_i = 0; co_i < div_co; co_i++) {
for (int kh_i = 0; kh_i < kh; kh_i++) { for (int kh_i = 0; kh_i < kh; kh_i++) {
@ -88,9 +95,19 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
int ci_offset = ci_i * C4NUM + ci4_i; int ci_offset = ci_i * C4NUM + ci4_i;
if (co_offset < co && ci_offset < ci) { if (co_offset < co && ci_offset < ci) {
int ori_index = ((ci_offset * kh + kh_i) * kw + kw_i) * ci + co_offset; int ori_index = ((ci_offset * kh + kh_i) * kw + kw_i) * ci + co_offset;
padWeight_[index++] = origin_weight[ori_index]; if (enable_fp16_) {
if (weight_dtype == kNumberTypeFloat32) {
reinterpret_cast<float16_t *>(padWeight_)[index++] =
lite::Float32ToShort(reinterpret_cast<float *>(origin_weight)[ori_index]);
} else {
reinterpret_cast<float16_t *>(padWeight_)[index++] =
reinterpret_cast<float16_t *>(origin_weight)[ori_index];
}
} else {
reinterpret_cast<float *>(padWeight_)[index++] = reinterpret_cast<float *>(origin_weight)[ori_index];
}
} else { } else {
padWeight_[index++] = 0.; reinterpret_cast<FLT *>(padWeight_)[index++] = 0.;
} }
} }
} }
@ -104,17 +121,24 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
size_t im_dst_x, im_dst_y; size_t im_dst_x, im_dst_y;
im_dst_x = div_co; im_dst_x = div_co;
im_dst_y = 1; im_dst_y = 1;
#ifdef ENABLE_FP16
size_t img_dtype = CL_HALF_FLOAT;
#else
size_t img_dtype = CL_FLOAT; size_t img_dtype = CL_FLOAT;
#endif if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
std::vector<size_t> img_size{im_dst_x, im_dst_y, img_dtype}; std::vector<size_t> img_size{im_dst_x, im_dst_y, img_dtype};
bias_ = reinterpret_cast<FLOAT_t *>(allocator->Malloc(im_dst_x * im_dst_y * C4NUM * sizeof(FLOAT_t), img_size)); bias_ = allocator->Malloc(im_dst_x * im_dst_y * C4NUM * data_size, img_size);
bias_ = reinterpret_cast<FLOAT_t *>(allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true)); bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
memset(bias_, 0x00, div_co * C4NUM * sizeof(FLOAT_t)); memset(bias_, 0x00, div_co * C4NUM * sizeof(data_size));
auto bias_dtype = in_tensors_[2]->data_type();
if (in_tensors_.size() >= 3) { if (in_tensors_.size() >= 3) {
memcpy(bias_, in_tensors_[2]->Data(), co * sizeof(FLOAT_t)); if (bias_dtype == kNumberTypeFloat32 && enable_fp16_) {
auto fdata = reinterpret_cast<float *>(in_tensors_[2]->Data());
for (int i = 0; i < co; i++) {
reinterpret_cast<float16_t *>(bias_)[i] = lite::Float32ToShort(fdata[i]);
}
} else {
memcpy(bias_, in_tensors_[2]->Data(), co * data_size);
}
} }
allocator->UnmapBuffer(bias_); allocator->UnmapBuffer(bias_);
} }
@ -123,11 +147,10 @@ int Conv2dTransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *i
size_t im_dst_x, im_dst_y; size_t im_dst_x, im_dst_y;
im_dst_x = UP_DIV(out_tensors_[0]->Channel() * out_tensors_[0]->Width(), C4NUM); im_dst_x = UP_DIV(out_tensors_[0]->Channel() * out_tensors_[0]->Width(), C4NUM);
im_dst_y = out_tensors_[0]->Height(); im_dst_y = out_tensors_[0]->Height();
#ifdef ENABLE_FP16
size_t img_dtype = CL_HALF_FLOAT;
#else
size_t img_dtype = CL_FLOAT; size_t img_dtype = CL_FLOAT;
#endif if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear(); img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec; *img_size = vec;
@ -197,4 +220,5 @@ kernel::LiteKernel *OpenCLConv2dTransposeKernelCreator(const std::vector<lite::t
} }
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DeConv2D, OpenCLConv2dTransposeKernelCreator) REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DeConv2D, OpenCLConv2dTransposeKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_DeConv2D, OpenCLConv2dTransposeKernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -42,8 +42,9 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel {
private: private:
ConvParameter *parameter_; ConvParameter *parameter_;
cl::Kernel kernel_; cl::Kernel kernel_;
FLOAT_t *padWeight_; void *padWeight_;
FLOAT_t *bias_; void *bias_;
bool enable_fp16_{false};
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -128,11 +128,11 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
MS_LOG(ERROR) << "Unsupported format. " << out_tensors_[0]->GetFormat(); MS_LOG(ERROR) << "Unsupported format. " << out_tensors_[0]->GetFormat();
} }
img_size->clear(); img_size->clear();
#ifdef ENABLE_FP16 auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable();
size_t img_dtype = CL_HALF_FLOAT;
#else
size_t img_dtype = CL_FLOAT; size_t img_dtype = CL_FLOAT;
#endif if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec; *img_size = vec;
return RET_OK; return RET_OK;

View File

@ -50,10 +50,12 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
auto svm_capabilities = ocl_runtime->GetSVMCapabilities(); auto svm_capabilities = ocl_runtime->GetSVMCapabilities();
size_t img_pitch = 0; size_t img_pitch = 0;
size_t dtype_size = 1;
if (!img_size.empty()) { if (!img_size.empty()) {
dtype_size = img_size[2] == CL_FLOAT ? sizeof(cl_float4) : sizeof(cl_half4);
uint32_t image_alignment = ocl_runtime->GetImagePitchAlignment(); uint32_t image_alignment = ocl_runtime->GetImagePitchAlignment();
img_pitch = (img_size[0] + image_alignment - 1) / image_alignment * image_alignment; img_pitch = (img_size[0] + image_alignment - 1) / image_alignment * image_alignment;
size = img_pitch * img_size[1] * sizeof(cl_float4); size = img_pitch * img_size[1] * dtype_size;
} }
if (size > MAX_MALLOC_SIZE) { if (size > MAX_MALLOC_SIZE) {
MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
@ -107,7 +109,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
if (!img_size.empty()) { if (!img_size.empty()) {
cl::ImageFormat image_format(CL_RGBA, img_size[2]); cl::ImageFormat image_format(CL_RGBA, img_size[2]);
cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime->Context(), image_format, *buffer, img_size[0], cl::Image2D *image = new (std::nothrow) cl::Image2D(*ocl_runtime->Context(), image_format, *buffer, img_size[0],
img_size[1], img_pitch * sizeof(cl_float4), &ret); img_size[1], img_pitch * dtype_size, &ret);
if (image == nullptr || ret != CL_SUCCESS) { if (image == nullptr || ret != CL_SUCCESS) {
delete buffer; delete buffer;
UnLock(); UnLock();

View File

@ -29,23 +29,26 @@ class TestConv2dTransposeOpenCL : public mindspore::CommonTest {
TestConv2dTransposeOpenCL() {} TestConv2dTransposeOpenCL() {}
}; };
TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { void RunTestCase(const std::vector<int> shape, const std::vector<std::string> file_path, bool fp16) {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
if (fp16) {
ocl_runtime->SetFp16Enable(true);
}
ocl_runtime->Init(); ocl_runtime->Init();
auto allocator = ocl_runtime->GetAllocator(); auto allocator = ocl_runtime->GetAllocator();
int pad = 0; int pad = shape[0];
int n = 1; int n = shape[1];
int h = 240; int h = shape[2];
int w = 240; int w = shape[3];
int kh = 2; int kh = shape[4];
int kw = 2; int kw = shape[5];
int ci = 128; int ci = shape[6];
int co = 128; int co = shape[7];
int oh = 2 * h - 1 + 2 * (kh - 1 - pad) - kh + 1; int oh = 2 * h - 1 + 2 * (kh - 1 - pad) - kh + 1;
int ow = 2 * w - 1 + 2 * (kw - 1 - pad) - kw + 1; int ow = 2 * w - 1 + 2 * (kw - 1 - pad) - kw + 1;
size_t input_size; size_t input_size;
std::string input_path = "./test_data/conv2d_transpose/conv2d_transpose_fp32_input.bin"; std::string input_path = file_path[0];
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
if (input_data == nullptr) { if (input_data == nullptr) {
MS_LOG(ERROR) << "input_data load error."; MS_LOG(ERROR) << "input_data load error.";
@ -53,7 +56,7 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
} }
size_t weight_size; size_t weight_size;
std::string weight_path = "./test_data/conv2d_transpose/conv2d_transpose_fp32_weight.bin"; std::string weight_path = file_path[1];
auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); auto weight_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size));
if (weight_data == nullptr) { if (weight_data == nullptr) {
MS_LOG(ERROR) << "weight_data load error."; MS_LOG(ERROR) << "weight_data load error.";
@ -61,14 +64,15 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
} }
size_t bias_size; size_t bias_size;
std::string bias_path = "./test_data/conv2d_transpose/conv2d_transpose_fp32_bias.bin"; std::string bias_path = file_path[2];
auto bias_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(bias_path.c_str(), &bias_size)); auto bias_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(bias_path.c_str(), &bias_size));
if (bias_data == nullptr) { if (bias_data == nullptr) {
MS_LOG(ERROR) << "bias_data load error."; MS_LOG(ERROR) << "bias_data load error.";
return; return;
} }
std::vector<int> input_shape = {n, h, w, ci}; std::vector<int> input_shape = {n, h, w, ci};
auto tensor_x_ptr = std::make_unique<lite::tensor::Tensor>(TypeId(kNumberTypeFloat32), input_shape); auto tensor_x_ptr =
std::make_unique<lite::tensor::Tensor>(TypeId(fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), input_shape);
auto tensor_x = tensor_x_ptr.get(); auto tensor_x = tensor_x_ptr.get();
if (tensor_x == nullptr) { if (tensor_x == nullptr) {
MS_LOG(ERROR) << "tensor_x create error."; MS_LOG(ERROR) << "tensor_x create error.";
@ -76,7 +80,8 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
} }
std::vector<int> weight_shape = {co, kh, kw, ci}; std::vector<int> weight_shape = {co, kh, kw, ci};
auto tensor_w_ptr = std::make_unique<lite::tensor::Tensor>(TypeId(kNumberTypeFloat32), weight_shape); auto tensor_w_ptr =
std::make_unique<lite::tensor::Tensor>(TypeId(fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), weight_shape);
auto tensor_w = tensor_w_ptr.get(); auto tensor_w = tensor_w_ptr.get();
if (tensor_w == nullptr) { if (tensor_w == nullptr) {
MS_LOG(ERROR) << "tensor_w create error."; MS_LOG(ERROR) << "tensor_w create error.";
@ -85,7 +90,8 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
tensor_w->SetData(weight_data); tensor_w->SetData(weight_data);
std::vector<int> bias_shape = {co}; std::vector<int> bias_shape = {co};
auto tensor_bias_ptr = std::make_unique<lite::tensor::Tensor>(TypeId(kNumberTypeFloat32), bias_shape); auto tensor_bias_ptr =
std::make_unique<lite::tensor::Tensor>(TypeId(fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), bias_shape);
auto tensor_bias = tensor_bias_ptr.get(); auto tensor_bias = tensor_bias_ptr.get();
if (tensor_bias == nullptr) { if (tensor_bias == nullptr) {
MS_LOG(ERROR) << "tensor_bias create error."; MS_LOG(ERROR) << "tensor_bias create error.";
@ -94,7 +100,8 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
tensor_bias->SetData(bias_data); tensor_bias->SetData(bias_data);
std::vector<int> out_shape = {1, oh, ow, co}; std::vector<int> out_shape = {1, oh, ow, co};
auto tensor_out_ptr = std::make_unique<lite::tensor::Tensor>(TypeId(kNumberTypeFloat32), out_shape); auto tensor_out_ptr =
std::make_unique<lite::tensor::Tensor>(TypeId(fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), out_shape);
auto tensor_out = tensor_out_ptr.get(); auto tensor_out = tensor_out_ptr.get();
if (tensor_out == nullptr) { if (tensor_out == nullptr) {
MS_LOG(ERROR) << "tensor_out create error."; MS_LOG(ERROR) << "tensor_out create error.";
@ -116,17 +123,18 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
opParameter->pad_w_ = pad; opParameter->pad_w_ = pad;
opParameter->input_channel_ = ci; opParameter->input_channel_ = ci;
opParameter->output_channel_ = co; opParameter->output_channel_ = co;
auto arith_kernel_ptr = std::make_unique<kernel::Conv2dTransposeOpenCLKernel>( auto op_kernel_ptr = std::make_unique<kernel::Conv2dTransposeOpenCLKernel>(
reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
auto arith_kernel = arith_kernel_ptr.get(); auto op_kernel = op_kernel_ptr.get();
if (arith_kernel == nullptr) { if (op_kernel == nullptr) {
MS_LOG(ERROR) << "arith_kernel create error."; MS_LOG(ERROR) << "op_kernel create error.";
return; return;
} }
arith_kernel->Init(); op_kernel->set_name("DeConv");
op_kernel->Init();
inputs[0]->MallocData(allocator); inputs[0]->MallocData(allocator);
std::vector<kernel::LiteKernel *> kernels{arith_kernel}; std::vector<kernel::LiteKernel *> kernels{op_kernel};
std::vector<lite::tensor::Tensor *> inputs_g{tensor_x}; std::vector<lite::tensor::Tensor *> inputs_g{tensor_x};
auto pGraph_ptr = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs_g, outputs, kernels, kernels, kernels); auto pGraph_ptr = std::make_unique<kernel::SubGraphOpenCLKernel>(inputs_g, outputs, kernels, kernels, kernels);
auto pGraph = pGraph_ptr.get(); auto pGraph = pGraph_ptr.get();
@ -138,13 +146,16 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
pGraph->Init(); pGraph->Init();
memcpy(inputs[0]->Data(), input_data, input_size); memcpy(inputs[0]->Data(), input_data, input_size);
pGraph->Run(); pGraph->Run();
using FLT = float;
if (fp16) {
using FLT = float16_t;
}
std::cout << "==================output data=================" << std::endl; std::cout << "==================output data=================" << std::endl;
float *output_data = reinterpret_cast<float *>(tensor_out->Data()); FLT *output_data = reinterpret_cast<FLT *>(tensor_out->Data());
std::cout << std::endl; std::cout << std::endl;
size_t output_size; size_t output_size;
std::string output_path = "./test_data/conv2d_transpose/conv2d_transpose_fp32_output.bin"; std::string output_path = file_path[3];
auto correct_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); auto correct_data = reinterpret_cast<FLT *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size));
if (correct_data == nullptr) { if (correct_data == nullptr) {
MS_LOG(ERROR) << "correct_data create error."; MS_LOG(ERROR) << "correct_data create error.";
return; return;
@ -152,7 +163,7 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
int size_n = oh * ow * co; int size_n = oh * ow * co;
size_n = size_n > 100 ? 100 : size_n; size_n = size_n > 100 ? 100 : size_n;
for (int i = 0; i < size_n; i++) { for (int i = 0; i < size_n; i++) {
std::cout << output_data[i] << ", "; std::cout << output_data[i] << ", " << correct_data[i] << " ";
if ((i + 1) % co == 0) { if ((i + 1) % co == 0) {
std::cout << std::endl; std::cout << std::endl;
} }
@ -160,10 +171,43 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
std::cout << std::endl; std::cout << std::endl;
// compare // compare
CompareOutputData(output_data, correct_data, oh * ow * co, 0.00001); CommonTest::CompareOutputData(output_data, correct_data, oh * ow * co, 0.00001);
inputs[0]->SetData(nullptr); inputs[0]->SetData(nullptr);
outputs[0]->SetData(nullptr); outputs[0]->SetData(nullptr);
MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed"; MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed";
lite::opencl::OpenCLRuntime::DeleteInstance(); lite::opencl::OpenCLRuntime::DeleteInstance();
} }
TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) {
int pad = 0;
int n = 1;
int h = 240;
int w = 240;
int kh = 2;
int kw = 2;
int ci = 128;
int co = 128;
std::vector<int> shape = {pad, n, h, w, kh, kw, ci, co};
std::vector<std::string> file_path = {"./test_data/conv2d_transpose/conv2d_transpose_fp32_input.bin",
"./test_data/conv2d_transpose/conv2d_transpose_fp32_weight.bin",
"./test_data/conv2d_transpose/conv2d_transpose_fp32_bias.bin",
"./test_data/conv2d_transpose/conv2d_transpose_fp32_output.bin"};
RunTestCase(shape, file_path, false);
}
TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp16) {
int pad = 0;
int n = 1;
int h = 240;
int w = 240;
int kh = 2;
int kw = 2;
int ci = 128;
int co = 128;
std::vector<int> shape = {pad, n, h, w, kh, kw, ci, co};
std::vector<std::string> file_path = {"./test_data/conv2d_transpose/conv2d_transpose_fp16_input.bin",
"./test_data/conv2d_transpose/conv2d_transpose_fp16_weight.bin",
"./test_data/conv2d_transpose/conv2d_transpose_fp16_bias.bin",
"./test_data/conv2d_transpose/conv2d_transpose_fp16_output.bin"};
RunTestCase(shape, file_path, true);
}
} // namespace mindspore } // namespace mindspore