diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc index 3ec34cdecb8..46121dd7039 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc @@ -43,20 +43,24 @@ int ConvolutionOpenCLKernel::Init() { std::set build_options; use_fp16_ = ocl_runtime_->GetFp16Enable(); + auto input_tensor = in_tensors_[0]; + auto output_tensor = out_tensors_[0]; + in_ori_format_ = input_tensor->GetFormat(); + out_ori_format_ = output_tensor->GetFormat(); if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; + return RET_ERROR; } - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); + input_tensor->SetFormat(op_format_); + output_tensor->SetFormat(op_format_); - CI_ = in_tensors_[0]->Channel(); - IH_ = in_tensors_[0]->Height(); - IW_ = in_tensors_[0]->Width(); - CO_ = out_tensors_[0]->Channel(); - OH_ = out_tensors_[0]->Height(); - OW_ = out_tensors_[0]->Width(); + batch_size_ = input_tensor->Batch(); + CI_ = input_tensor->Channel(); + IH_ = input_tensor->Height(); + IW_ = input_tensor->Width(); + CO_ = output_tensor->Channel(); + OH_ = output_tensor->Height(); + OW_ = output_tensor->Width(); CI_SLICES_ = UP_DIV(CI_, C4NUM); CO_SLICES_ = UP_DIV(CO_, C4NUM); KH_ = param->kernel_h_; @@ -68,46 +72,11 @@ int ConvolutionOpenCLKernel::Init() { TILES_XY_ = TILES_X_ * TILES_Y_; use_winograd_ = UseWinograd4x4To6x6(); - std::vector vpara{IH_, - IW_, - OH_, - OW_, - KH_, - KW_, - CI_SLICES_, - CO_SLICES_, - param->stride_h_, - param->stride_w_, - param->pad_u_, - param->pad_l_, - param->pad_d_, - param->pad_r_}; - std::string code_id; - for (auto &iv : vpara) { - code_id += "_" + std::to_string(iv); - } - - std::vector vflag{ - use_fp16_, - op_format_ == schema::Format_NC4HW4, - op_format_ == schema::Format_NHWC4, - param->act_type_ == ActType_Relu6, - param->act_type_ == ActType_Relu, - param->pad_u_ || param->pad_d_, - OW_ % 2 == 1, - OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE, - }; - unsigned int init_count = 0; - for (size_t i = 0; i < vflag.size(); ++i) { - init_count |= ((unsigned int)vflag[i]) << i; - } - - code_id += "_" + std::to_string(init_count); - // build kernel + auto code_id = get_code_id(); + std::string program_name; if (use_winograd_) { MS_LOG(DEBUG) << "use winograd"; - std::string program_name; program_name = "Winograd4x4To36" + code_id; ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36()); ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options); @@ -120,7 +89,7 @@ int ConvolutionOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4()); ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options); } else { - std::string program_name = "convolution" + code_id; + program_name = "Convolution" + code_id; std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4(); ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options); @@ -281,18 +250,18 @@ int ConvolutionOpenCLKernel::InitBuffer() { int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { size_t im_dst_x, im_dst_y; if (in_tensors_[0]->GetFormat() == Format_NHWC4) { - if (out_tensors_[0]->Width() * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { + if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { { - im_dst_x = out_tensors_[0]->Width() * CO_SLICES_; - im_dst_y = out_tensors_[0]->Height(); + im_dst_y = batch_size_ * OH_; + im_dst_x = OW_ * CO_SLICES_; } } else { - im_dst_x = out_tensors_[0]->Height() * CO_SLICES_; - im_dst_y = out_tensors_[0]->Width(); + im_dst_y = OW_; + im_dst_x = batch_size_ * OH_ * CO_SLICES_; } } else { - im_dst_y = out_tensors_[0]->Height() * CO_SLICES_; - im_dst_x = out_tensors_[0]->Width(); + im_dst_y = batch_size_ * CO_SLICES_ * OH_; + im_dst_x = OW_; } size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; img_size->clear(); @@ -374,6 +343,8 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNHWC4() { std::string code; code += "#define CI_TILE 4\n"; code += "#define CO_TILE 4\n\n"; + code += "#define N " + std::to_string(batch_size_) + "\n"; + code += "#define N_OH " + std::to_string(batch_size_ * OH_) + "\n"; code += "#define CI " + std::to_string(CI_ALIGN) + "\n"; code += "#define IH " + std::to_string(IH_) + "\n"; code += "#define IW " + std::to_string(IW_) + "\n"; @@ -404,13 +375,22 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNHWC4() { " __global FLT4 *bias)" "{\n"; + code += " int n_oh = get_global_id(0); // [0, N*OH)\n"; + if (batch_size_ == 1) { + code += " #define n 0\n"; + code += " int oh = n_oh;\n"; + } else { + code += " int n = n_oh / " + std::to_string(OH_) + ";\n"; + code += " int oh = n_oh % " + std::to_string(OH_) + ";\n"; + } + code += - " int ow = get_global_id(0); // [0, OW)\n" - " int oh = get_global_id(1); // [0, OH)\n" + " int ow = get_global_id(1); // [0, OW)\n" " int co_slice = get_global_id(2); // [0, UP_DIV(CO, CO_TILE) )\n" "\n" - " if (oh >= OH || ow >= OW || co_slice >= CO_SLICES)\n" + " if (n_oh >= N_OH || ow >= OW || co_slice >= CO_SLICES) {\n" " return;\n" + " }\n" "\n" " FLT4 out0_c4 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n" " __global FLT4 *w0_ic1_oc4 = weight + co_slice * KH * KW * CI_SLICES * CI_TILE;\n"; @@ -426,7 +406,10 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNHWC4() { " {\n" " for (int ci_slice = 0; ci_slice < CI_SLICES; ci_slice++)\n" " {\n"; - code += "FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(iw * CI_SLICES + ci_slice, ih)); // NHWC4: H WC\n\n"; + + code += + "FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(iw * CI_SLICES + ci_slice, n * IH + ih)); // NHWC4: NH WC\n\n"; + code += " out0_c4 += w0_ic1_oc4[0] * in_c4.x;\n" " out0_c4 += w0_ic1_oc4[1] * in_c4.y;\n" @@ -448,11 +431,10 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNHWC4() { } else if (param->act_type_ == ActType_Relu6) { code += " out0_c4_bias = clamp(out0_c4_bias, (FLT4)(0.0f), (FLT4)(6.0f));\n"; } - if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { - code += " WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, oh), out0_c4_bias);// NHWC4: H WC\n}"; + code += " WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, n_oh), out0_c4_bias);// NHWC4: NH WC\n}"; } else { - code += " WRITE_IMAGE(output, (int2)(oh * CO_SLICES + co_slice, ow), out0_c4_bias);// NHWC4: H WC\n}"; + code += " WRITE_IMAGE(output, (int2)(n_oh * CO_SLICES + co_slice, ow), out0_c4_bias);\n}"; } return code; } @@ -480,14 +462,26 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNC4HW4() { " __global FLT4 *bias,\n" " const int4 input_shape,\n" " const int4 output_shape)\n" - "{\n" - " int ow = get_global_id(0) * 2;\n" - " int oh = get_global_id(1);\n" + "{\n"; + + code += " int n_oh = get_global_id(0); // [0, N*OH)\n"; + if (batch_size_ == 1) { + code += " #define n 0\n"; + code += " int oh = n_oh;\n"; + } else { + code += " int n = n_oh / " + std::to_string(OH_) + ";\n"; + code += " int oh = n_oh % " + std::to_string(OH_) + ";\n"; + } + + code += + " int ow = get_global_id(1) * 2;\n" " int co_slice = get_global_id(2);\n" "\n" " int CI_SLICES = input_shape.w;\n" " int CO_SLICES = output_shape.w;\n\n"; + code += " #define N " + std::to_string(batch_size_) + "\n"; + code += " #define N_OH " + std::to_string(batch_size_ * OH_) + "\n"; code += " #define IH " + std::to_string(IH_) + "\n"; code += " #define IW " + std::to_string(IW_) + "\n"; code += " #define OH " + std::to_string(OH_) + "\n"; @@ -500,8 +494,9 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNC4HW4() { code += " #define padLeft " + std::to_string(padLeft) + "\n\n"; code += - " if (oh >= OH || ow >= OW || co_slice >= CO_SLICES)\n" - " return;\n\n"; + " if (n_oh >= N_OH || ow >= OW || co_slice >= CO_SLICES) {\n" + " return;\n" + " }\n"; bool check_ow = (OW_ % 2) == 1; if (check_ow) { @@ -539,12 +534,12 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNC4HW4() { " int iw1 = kw + (ow + 1) * strideW - padLeft;\n" " for (int ci_slice = 0; ci_slice < CI_SLICES; ci_slice++)\n" " {\n" - " FLT4 in0 = READ_IMAGE(input, smp_zero, (int2)(iw0, ci_slice * IH + ih));\n" + " FLT4 in0 = READ_IMAGE(input, smp_zero, (int2)(iw0, (n * CI_SLICES + ci_slice) * IH + ih));\n" " out0 += w[0] * in0.x;\n" " out0 += w[1] * in0.y;\n" " out0 += w[2] * in0.z;\n" " out0 += w[3] * in0.w;\n" - " FLT4 in1 = READ_IMAGE(input, smp_zero, (int2)(iw1, ci_slice * IH + ih));\n" + " FLT4 in1 = READ_IMAGE(input, smp_zero, (int2)(iw1, (n * CI_SLICES + ci_slice) * IH + ih));\n" " out1 += w[0] * in1.x;\n" " out1 += w[1] * in1.y;\n" " out1 += w[2] * in1.z;\n" @@ -558,7 +553,7 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNC4HW4() { " {\n" " for (int ci_slice = 0; ci_slice < CI_SLICES; ci_slice++)\n" " {\n" - " FLT4 in0 = READ_IMAGE(input, smp_zero, (int2)(iw0, ci_slice * IH + ih));\n" + " FLT4 in0 = READ_IMAGE(input, smp_zero, (int2)(iw0, (n * CI_SLICES + ci_slice) * IH + ih));\n" " out0 += w[0] * in0.x;\n" " out0 += w[1] * in0.y;\n" " out0 += w[2] * in0.z;\n" @@ -585,7 +580,7 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNC4HW4() { } else if (param->act_type_ == ActType_Relu6) { code += " out0 = clamp(out0, (FLT4)(0.0f), (FLT4)(6.0f));\n"; } - code += " WRITE_IMAGE(output, (int2)(ow + 0, co_slice * OH + oh), out0);\n"; + code += " WRITE_IMAGE(output, (int2)(ow + 0, (n * CO_SLICES + co_slice) * OH + oh), out0);\n"; if (check_ow) { code += @@ -598,7 +593,7 @@ std::string ConvolutionOpenCLKernel::CodeGenConvolutionNC4HW4() { } else if (param->act_type_ == ActType_Relu6) { code += " out1 = clamp(out1, (FLT4)(0.0f), (FLT4)(6.0f));\n"; } - code += " WRITE_IMAGE(output, (int2)(ow + 1, co_slice * OH + oh), out1);\n"; + code += " WRITE_IMAGE(output, (int2)(ow + 1, (n * CO_SLICES + co_slice) * OH + oh), out1);\n"; if (check_ow) { code += "}\n"; } @@ -860,7 +855,7 @@ int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector *global, std size_t max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime_->Device())()); const size_t max_z_size = std::min(16, max_work_item_sizes[2]); - size_t global_h = UP_DIV(OH_, work_group_size[0]) * work_group_size[0]; + size_t global_nh = UP_DIV(batch_size_ * OH_, work_group_size[0]) * work_group_size[0]; size_t global_w = UP_DIV(OW_, work_group_size[1]) * work_group_size[1]; size_t global_c = UP_DIV(CO_SLICES_, work_group_size[2]) * work_group_size[2]; @@ -871,9 +866,9 @@ int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector *global, std } size_t local_hw_size = std::min(256, max_work_group_size) / local_c; size_t local_w = std::min(global_w, local_hw_size); - size_t local_h = std::min(local_hw_size / local_w, global_h); - if (local_h == global_h && global_h % 2 == 0) { - local_h = global_h / 2; + size_t local_nh = std::min(local_hw_size / local_w, global_nh); + if (local_nh == global_nh && global_nh % 2 == 0) { + local_nh = global_nh / 2; } if (op_format_ == Format_NHWC4) { @@ -883,22 +878,21 @@ int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector *global, std } global->clear(); + global->push_back(UP_DIV(batch_size_ * OH_, local_nh) * local_nh); global->push_back(UP_DIV(OW_, local_w) * local_w); - global->push_back(UP_DIV(OH_, local_h) * local_h); global->push_back(UP_DIV(CO_SLICES_, local_c) * local_c); local->clear(); + local->push_back(local_nh); local->push_back(local_w); - local->push_back(local_h); local->push_back(local_c); if (op_format_ == Format_NC4HW4) { // calculate 2 FLT4 along width per work-item - global->at(0) = UP_DIV(global->at(0), 2); - if (local->at(0) > global->at(0)) { - local->at(0) = global->at(0); + global->at(1) = UP_DIV(global->at(1), 2); + if (local->at(1) > global->at(1)) { + local->at(1) = global->at(1); } } - return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h index 41937c3b851..f7306a548db 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h @@ -41,6 +41,7 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { private: bool use_fp16_ = false; + int batch_size_{}; int CI_{}; int IH_{}; int IW_{}; @@ -84,11 +85,40 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { const bool attr_valid = param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && param->pad_u_ == 1 && param->pad_d_ == 1 && param->pad_l_ == 1 && param->pad_r_ == 1 && param->dilation_h_ == 1 && param->dilation_w_ == 1 && IH_ == OH_ && - IW_ == OW_; + IW_ == OW_ && batch_size_ == 1; const bool channel_good = CI_SLICES_ >= 12 && CO_SLICES_ >= 12; const bool hw_good = TILES_X_ * TILES_Y_ >= 16; return attr_valid && channel_good && hw_good; } + + std::string get_code_id() { + auto param = reinterpret_cast(op_parameter_); + std::vector vpara{batch_size_, + CI_, + IH_, + IW_, + CO_, + OH_, + OW_, + KH_, + KW_, + param->stride_h_, + param->stride_w_, + param->pad_u_, + param->pad_l_, + param->pad_d_, + param->pad_r_, + param->dilation_h_, + param->dilation_w_, + use_fp16_, + op_format_, + param->act_type_}; + std::string code_id; + for (auto &iv : vpara) { + code_id += "_" + std::to_string(iv); + } + return code_id; + } }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc index c0ac7d6fe2a..66f90d6f938 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc @@ -96,7 +96,7 @@ void CompareOutput(Tensor *output, const float *expect_data, const float atol) { for (int l = 0; l < L; ++l) { for (int m = 0; m < M; ++m) { auto err = std::fabs(output_data[cn] - expect_data[cn]); - if (first_err_idx == -1 && max_err > atol) { + if (first_err_idx == -1 && err > atol) { first_err_idx = cn; } if (err > max_err) { @@ -128,22 +128,10 @@ void CompareOutput(Tensor *output, const float *expect_data, const float atol) { } } -Format get_op_format(Format input_format) { - switch (input_format) { - case Format_NHWC: - case Format_NHWC4: - return Format_NHWC4; - case Format_NCHW: - return Format_NHWC4; - default: - return Format_NC4HW4; - } -} - -void TEST_MAIN(const std::string &attr, Format input_format, Format output_format, const TypeId data_type, - const float atol, const float *input_data, const float *weight_data, const float *bias_data, - const float *expect_data) { - auto param = std::make_unique(); +void TEST_MAIN(const std::string &attr, Format input_format, Format output_format, Format op_format, + const TypeId data_type, const float atol, const float *input_data, const float *weight_data, + const float *bias_data, const float *expect_data) { + auto param = static_cast(malloc(sizeof(ConvParameter))); if (param == nullptr) { MS_LOG(ERROR) << "ConvParameter create error."; return; @@ -179,13 +167,15 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma LoadData(&bias, bias_data); MS_LOG(DEBUG) << "create OpenCL Kernel"; - auto kernel = - ConvolutionOpenCLKernel(reinterpret_cast(param.release()), {&input, &weight, &bias}, {&output}); - kernel.SetFormatType(get_op_format(input_format)); - kernel.Init(); + std::vector inputs{&input, &weight, &bias}; + std::vector outputs{&output}; + auto kernel = std::make_unique(reinterpret_cast(param), inputs, outputs); + kernel->SetFormatType(op_format); + kernel->Init(); MS_LOG(DEBUG) << "create SubGraph"; - auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel({&input}, {&output}, {&kernel}, {&kernel}, {&kernel}); + std::vector kernels{kernel.release()}; + auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel({&input}, {&output}, kernels, kernels, kernels); if (sub_graph == nullptr) { return; } @@ -203,8 +193,8 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma delete sub_graph; } -void TEST_MAIN(const std::string &attr, Format input_format, Format output_format, const TypeId data_type, - const float atol, const std::string &data_path) { +void TEST_MAIN(const std::string &attr, Format input_format, Format output_format, Format op_format, + const TypeId data_type, const float atol, const std::string &data_path) { auto testcase_path = data_path + "/" + attr + "/"; std::map format_str{ {Format_NCHW, "NCHW"}, {Format_NHWC, "NHWC"}, {Format_NHWC4, "NHWC4"}, {Format_NC4HW4, "NC4HW4"}}; @@ -227,27 +217,29 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma printf("bias [0-3]: %7.3f %7.3f %7.3f\n", bias_data[0], bias_data[1], bias_data[2]); printf("expect[0-3]: %7.3f %7.3f %7.3f\n", expect_data[0], expect_data[1], expect_data[2]); - TEST_MAIN(attr, input_format, output_format, data_type, atol, input_data, weight_data, bias_data, expect_data); + TEST_MAIN(attr, input_format, output_format, op_format, data_type, atol, input_data, weight_data, bias_data, + expect_data); } TEST_F(TestConvolutionOpenCL, in1x224x224x3_out1x112x112x32_k33_s22_p0101) { std::string attr = "inputNHWC_1x224x224x3_outputNHWC_1x112x112x32_kernelHW_3x3_strideHW_2x2_padTopBottomLeftRight_0x1x0x1_dilationHW_" "1x1"; - TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, kNumberTypeFloat32, 2e-6f, "testcases/mobilenetv2_fp32/"); - TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, kNumberTypeFloat16, 2e-2f, "testcases/mobilenetv2_fp32/"); - TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, kNumberTypeFloat32, 2e-6f, "testcases/mobilenetv2_fp32/"); - TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, kNumberTypeFloat16, 2e-2f, "testcases/mobilenetv2_fp32/"); + // TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, Format_NHWC4, kNumberTypeFloat32, 2e-6f, + // "testcases/mobilenetv2_fp32/"); TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, Format_NHWC4, kNumberTypeFloat16, + // 2e-2f, "testcases/mobilenetv2_fp32/"); + TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, Format_NHWC4, kNumberTypeFloat32, 2e-6f, "testcases/mobilenetv2_fp32/"); + TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, Format_NHWC4, kNumberTypeFloat16, 2e-2f, "testcases/mobilenetv2_fp32/"); } TEST_F(TestConvolutionOpenCL, winograd_inputNHWC_1x16x256x96_outputNHWC_1x16x256x80) { std::string attr = "inputNHWC_1x16x256x96_outputNHWC_1x16x256x80_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_dilationHW_" "1x1"; - TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, kNumberTypeFloat32, 1e-4f, "testcases/test_fp32/"); - TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, kNumberTypeFloat16, 0.6f, "testcases/test_fp32/"); - TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, kNumberTypeFloat32, 1e-4f, "testcases/test_fp32/"); - TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, kNumberTypeFloat16, 0.6f, "testcases/test_fp32/"); + // TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, Format_NHWC4, kNumberTypeFloat32, 1e-4f, "testcases/test_fp32/"); + // TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, Format_NHWC4, kNumberTypeFloat16, 0.6f, "testcases/test_fp32/"); + TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, Format_NHWC4, kNumberTypeFloat32, 1e-4f, "testcases/test_fp32/"); + TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, Format_NHWC4, kNumberTypeFloat16, 0.6f, "testcases/test_fp32/"); } TEST_F(TestConvolutionOpenCL, simple_test0_NHWC) { @@ -257,9 +249,12 @@ TEST_F(TestConvolutionOpenCL, simple_test0_NHWC) { float weight_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; float bias_data[] = {0.0f, 0.0f}; float expect_data[] = {1.0f, 1.0f, 5.0f, 5.0f, 9.0f, 9.0f, 13.0f, 13.0f}; - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, + expect_data); } + TEST_F(TestConvolutionOpenCL, simple_test0_NCHW) { std::string attr = "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; @@ -267,8 +262,10 @@ TEST_F(TestConvolutionOpenCL, simple_test0_NCHW) { float weight_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; float bias_data[] = {0.0f, 0.0f}; float expect_data[] = {1.0f, 5.0f, 9.0f, 13.0f, 1.0f, 5.0f, 9.0f, 13.0f}; - TEST_MAIN(attr, Format_NCHW, Format_NCHW, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, Format_NCHW, Format_NCHW, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); + TEST_MAIN(attr, Format_NCHW, Format_NCHW, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NCHW, Format_NCHW, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, + expect_data); } TEST_F(TestConvolutionOpenCL, simple_test0_NHWC4_and_NC4HW4) { @@ -279,14 +276,14 @@ TEST_F(TestConvolutionOpenCL, simple_test0_NHWC4_and_NC4HW4) { float bias_data[] = {0.0f, 0.0f}; float expect_data[] = {1.0f, 1.0f, 0.0f, 0.0f, 5.0f, 5.0f, 0.0f, 0.0f, 9.0f, 9.0f, 0.0f, 0.0f, 13.0f, 13.0f, 0.0f, 0.0f}; - TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, - expect_data); - TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, - expect_data); - TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, - expect_data); - TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, - expect_data); + TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, + bias_data, expect_data); + TEST_MAIN(attr, Format_NHWC4, Format_NHWC4, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, + bias_data, expect_data); + TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, + bias_data, expect_data); + TEST_MAIN(attr, Format_NC4HW4, Format_NC4HW4, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, + bias_data, expect_data); } TEST_F(TestConvolutionOpenCL, simple_test1) { @@ -296,31 +293,56 @@ TEST_F(TestConvolutionOpenCL, simple_test1) { float weight_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; float bias_data[] = {0.5f, -0.5f}; float expect_data[] = {2.5f, 3.5f, 8.5f, 17.5f, 14.5f, 31.5f, 20.5f, 45.5f}; - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, + expect_data); } TEST_F(TestConvolutionOpenCL, simple_test2) { std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x1_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x1_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; float weight_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; float bias_data[] = {0.0f}; float expect_data[] = {28.0f, 18.0f, 22.0f, 13.0f}; - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, + expect_data); } TEST_F(TestConvolutionOpenCL, simple_test3) { std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; float weight_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; float bias_data[] = {0.5f, -0.5f}; float expect_data[] = {168.5f, 391.5f, 80.5f, 223.5f, 60.5f, 235.5f, 20.5f, 123.5f}; - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, Format_NHWC, Format_NHWC, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, + expect_data); +} + +TEST_F(TestConvolutionOpenCL, simple_test3_batch2) { + std::string attr = + "inputNHWC_2x2x2x2_outputNHWC_2x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; + float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; + float weight_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; + float bias_data[] = {0.5f, -0.5f}; + float expect_data[] = {168.5f, 391.5f, 80.5f, 223.5f, 60.5f, 235.5f, 20.5f, 123.5f, + 168.5f, 391.5f, 80.5f, 223.5f, 60.5f, 235.5f, 20.5f, 123.5f}; + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, + expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NC4HW4, kNumberTypeFloat32, 1e-3f, input_data, weight_data, + bias_data, expect_data); + TEST_MAIN(attr, Format_NHWC, Format_NHWC, Format_NC4HW4, kNumberTypeFloat16, 1e-6f, input_data, weight_data, + bias_data, expect_data); } } // namespace mindspore