From 89ca31a03db26eef86cec6bc01fa71d9263a67f1 Mon Sep 17 00:00:00 2001 From: chenzupeng Date: Fri, 21 Aug 2020 11:30:12 +0800 Subject: [PATCH] fix bug in matmul testcase --- .gitignore | 3 + .../kernel/opencl/kernel/activation.cc | 4 +- .../kernel/opencl/kernel/arithmetic.cc | 4 +- .../runtime/kernel/opencl/kernel/batchnorm.cc | 4 +- .../kernel/opencl/kernel/caffe_prelu.cc | 4 +- .../runtime/kernel/opencl/kernel/concat.cc | 4 +- .../kernel/opencl/kernel/conv2d_transpose.cc | 4 +- .../kernel/opencl/kernel/convolution.cc | 5 +- .../kernel/opencl/kernel/depthwise_conv2d.cc | 3 +- .../runtime/kernel/opencl/kernel/matmul.cc | 135 +++++++++--------- .../src/runtime/kernel/opencl/kernel/matmul.h | 10 +- .../runtime/kernel/opencl/kernel/pooling2d.cc | 4 +- .../src/runtime/kernel/opencl/kernel/prelu.cc | 4 +- .../runtime/kernel/opencl/kernel/reshape.cc | 10 +- .../runtime/kernel/opencl/kernel/softmax.cc | 6 +- .../runtime/kernel/opencl/kernel/transpose.cc | 10 +- .../src/runtime/kernel/opencl/opencl_kernel.h | 8 +- .../kernel/opencl/subgraph_opencl_kernel.cc | 23 ++- .../kernel/opencl/conv2d_transpose_tests.cc | 3 +- .../src/runtime/kernel/opencl/matmul_tests.cc | 23 +-- .../runtime/kernel/opencl/transpose_tests.cc | 11 +- 21 files changed, 165 insertions(+), 117 deletions(-) diff --git a/.gitignore b/.gitignore index 806e8e7b908..52c3e2fb73c 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,6 @@ mindspore/.commit_id # lite test file mindspore/lite/test/do_test/ + +# lite opencl compile file +*.cl.inc diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index 61248031735..80beb6ed54f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -67,7 +67,9 @@ int ActivationOpenClKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << op_parameter_->name_ << " init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index d52b68ffc47..633bc67bc6f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -122,7 +122,9 @@ int ArithmeticOpenCLKernel::Init() { if (error_code != RET_OK) { return error_code; } - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); Image2dGetWorkGroupSize(); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index b1b5cbb3674..a53a4cd9845 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -56,7 +56,9 @@ int BatchNormOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc index 9db594a30d9..553753a3f04 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/caffe_prelu.cc @@ -63,7 +63,9 @@ int CaffePReluOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << program_name << " Init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index d2b4fadce79..048ab3d5896 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -83,7 +83,9 @@ int ConcatOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); } - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index 027bcd664e3..f5f7ae1e0a7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -51,7 +51,9 @@ int Conv2dTransposeOpenCLKernel::Init() { ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif PadWeight(); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc index 48dd48e5fed..5a71b919941 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc @@ -92,8 +92,11 @@ int ConvolutionOpenCLKernel::Init() { } this->InitBuffer(); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); + MS_LOG(DEBUG) << "Convolution Init Done!"; return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 421a1dce00c..d49006f8ca3 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -42,7 +42,8 @@ int DepthwiseConv2dOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::string kernel_name = "DepthwiseConv2d"; auto in_format = in_tensors_[0]->GetFormat(); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_format; + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(in_format); if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { MS_LOG(ERROR) << "input format(" << in_format << ") " diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index b724e5f7e12..db2bbd638c9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -44,26 +44,28 @@ int MatMulOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - auto weight_format = in_tensors_[1]->GetFormat(); - if (weight_format != schema::Format_NHWC) { - MS_LOG(ERROR) << "weight format(" << weight_format << ") " - << "format not support!"; - return 1; + int ci, co; + if (in_tensors_[1]->shape().size() == 2) { + ci = in_tensors_[1]->shape()[1]; + co = in_tensors_[1]->shape()[0]; + } else { + ci = in_tensors_[1]->shape()[3]; + co = in_tensors_[1]->shape()[0]; } - int ci = in_tensors_[1]->shape()[3]; - int co = in_tensors_[1]->shape()[0]; - sizeCI = {ci, UP_DIV(ci, 4)}; - sizeCO = {co, UP_DIV(co, 4)}; - auto allocator = ocl_runtime->GetAllocator(); - padWeight_ = reinterpret_cast(allocator->Malloc(sizeCI.s[1] * sizeCO.s[1] * 16 * sizeof(FLOAT_T))); - padWeight_ = reinterpret_cast(allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true)); - bias_ = reinterpret_cast(allocator->Malloc(sizeCO.s[1] * 4 * sizeof(FLOAT_T))); - bias_ = reinterpret_cast(allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true)); + + sizeCI = {ci, UP_DIV(ci, C4NUM)}; + sizeCO = {co, UP_DIV(co, C4NUM)}; PadWeight(); - allocator->UnmapBuffer(padWeight_); - allocator->UnmapBuffer(bias_); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); + if (out_tensors_[0]->shape().size() == 2) { + out_ori_format_ = schema::Format_NC; + out_tensors_[0]->SetFormat(schema::Format_NC4); + in_ori_format_ = schema::Format_NC; + in_tensors_[0]->SetFormat(schema::Format_NC4); + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return 0; } @@ -71,16 +73,22 @@ int MatMulOpenCLKernel::Init() { int MatMulOpenCLKernel::ReSize() { return 0; } void MatMulOpenCLKernel::PadWeight() { - auto origin_weight = reinterpret_cast(in_tensors_.at(kWeightIndex)->Data()); + auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); + padWeight_ = + reinterpret_cast(allocator->Malloc(sizeCI.s[1] * sizeCO.s[1] * C4NUM * C4NUM * sizeof(FLOAT_t))); + padWeight_ = reinterpret_cast(allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true)); + + auto origin_weight = reinterpret_cast(in_tensors_.at(kWeightIndex)->Data()); int divCI = sizeCI.s[1]; int divCO = sizeCO.s[1]; + int co = sizeCO.s[0]; int index = 0; for (int i = 0; i < divCI; ++i) { for (int j = 0; j < divCO; ++j) { - for (int k = 0; k < 4; ++k) { - for (int l = 0; l < 4; ++l) { - int src_x = i * 4 + l; - int src_y = j * 4 + k; + for (int k = 0; k < C4NUM; ++k) { + for (int l = 0; l < C4NUM; ++l) { + int src_x = i * C4NUM + l; + int src_y = j * C4NUM + k; if (src_x < sizeCI.s[0] && src_y < sizeCO.s[0]) { padWeight_[index++] = origin_weight[src_y * sizeCI.s[0] + src_x]; } else { @@ -90,60 +98,55 @@ void MatMulOpenCLKernel::PadWeight() { } } } - if (hasBias_) { - memcpy(bias_, in_tensors_[2]->Data(), sizeof(FLOAT_T) * sizeCO.s[0]); - for (int i = sizeCO.s[0]; i < sizeCO.s[1] * 4; i++) { - bias_[i] = 0; - } - } else { - for (int i = 0; i < sizeCO.s[1] * 4; i++) { - bias_[i] = 0; - } + + size_t im_dst_x, im_dst_y; + im_dst_x = divCO; + im_dst_y = 1; +#ifdef ENABLE_FP16 + size_t img_dtype = CL_HALF_FLOAT; +#else + size_t img_dtype = CL_FLOAT; +#endif + std::vector img_size{im_dst_x, im_dst_y, img_dtype}; + bias_ = reinterpret_cast(allocator->Malloc(im_dst_x * im_dst_y * C4NUM * sizeof(FLOAT_t), img_size)); + bias_ = reinterpret_cast(allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true)); + memset(bias_, 0x00, divCO * C4NUM * sizeof(FLOAT_t)); + if (in_tensors_.size() >= 3) { + memcpy(bias_, in_tensors_[2]->Data(), co * sizeof(FLOAT_t)); } + allocator->UnmapBuffer(bias_); +} + +int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { + size_t im_dst_x, im_dst_y; + im_dst_x = sizeCO.s[1]; + im_dst_y = 1; +#ifdef ENABLE_FP16 + size_t img_dtype = CL_HALF_FLOAT; +#else + size_t img_dtype = CL_FLOAT; +#endif + img_size->clear(); + std::vector vec{im_dst_x, im_dst_y, img_dtype}; + *img_size = vec; + return RET_OK; } int MatMulOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - std::vector shapex = in_tensors_[0]->shape(); - int n = shapex[0]; - if (n > 1) { - MS_LOG(ERROR) << "MatMul n > 1 not supported!"; - return 1; - } auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); // local size should less than MAX_GROUP_SIZE std::vector local = {64, 4}; std::vector global = {UP_ROUND(sizeCO.s[1], local[0]), 4}; - - cl::ImageFormat image_format; - { - image_format.image_channel_order = CL_RGBA; -#ifdef ENABLE_FP16 - image_format.image_channel_data_type = CL_HALF_FLOAT; -#else - image_format.image_channel_data_type = CL_FLOAT; -#endif - } - cl_int in_error_code, in_error_code_weight, in_error_code_bias, out_error_code; - cl::Image2D img_input(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image_format, sizeCI.s[1], 1, - 0, in_tensors_[0]->Data(), &in_error_code); - cl::Image2D img_bias(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image_format, sizeCO.s[1], 1, - 0, bias_, &in_error_code_bias); - cl::Image2D img_out(*ocl_runtime->Context(), CL_MEM_WRITE_ONLY, image_format, sizeCO.s[1], 1, 0, nullptr, - &out_error_code); - - ocl_runtime->SetKernelArg(kernel_, 0, img_input); - ocl_runtime->SetKernelArg(kernel_, 1, padWeight_); - ocl_runtime->SetKernelArg(kernel_, 2, img_bias); - ocl_runtime->SetKernelArg(kernel_, 3, img_out); - ocl_runtime->SetKernelArg(kernel_, 4, sizeCI); - ocl_runtime->SetKernelArg(kernel_, 5, sizeCO); - ocl_runtime->SetKernelArg(kernel_, 6, hasBias_ ? 1 : 0); + int arg_count = 0; + ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->Data()); + ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_); + ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_); + ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->Data()); + ocl_runtime->SetKernelArg(kernel_, arg_count++, sizeCI); + ocl_runtime->SetKernelArg(kernel_, arg_count++, sizeCO); + ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); ocl_runtime->RunKernel(kernel_, global, local, nullptr); - auto origin = cl::array{0, 0, 0}; - auto region = cl::array{(size_t)(sizeCO.s[1]), 1, 1}; - ocl_runtime->GetDefaultCommandQueue()->enqueueReadImage(img_out, CL_TRUE, origin, region, 0, 0, - out_tensors_[0]->Data()); return 0; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h index ffd279e28f9..f5f90d63071 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h @@ -23,11 +23,6 @@ #include "src/runtime/kernel/arm/nnacl/conv_parameter.h" #include "src/runtime/opencl/opencl_runtime.h" -#ifdef ENABLE_FP16 -using FLOAT_T = float16_t; -#else -using FLOAT_T = float; -#endif namespace mindspore::kernel { @@ -44,11 +39,12 @@ class MatMulOpenCLKernel : public OpenCLKernel { int ReSize() override; int Run() override; void PadWeight(); + int GetImageSize(size_t idx, std::vector *img_size) override; private: cl::Kernel kernel_; - FLOAT_T *padWeight_; - FLOAT_T *bias_; + FLOAT_t *padWeight_; + FLOAT_t *bias_; bool hasBias_ = false; cl_int2 sizeCI; cl_int2 sizeCO; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 276cd699bd4..27b4ff2800b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -73,7 +73,9 @@ int PoolingOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 7ff7993035a..02eae731646 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -46,7 +46,9 @@ int PReluOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << program_name << " init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index e58013e493f..a5153baaf36 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -43,8 +43,14 @@ int ReshapeOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(schema::Format_NHWC); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); + out_tensors_[0]->SetFormat(schema::Format_NHWC4); + if (out_tensors_[0]->shape().size() == 2) { + out_ori_format_ = schema::Format_NC; + out_tensors_[0]->SetFormat(schema::Format_NC4); + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index a3e82228ac5..caa6a57f956 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -123,10 +123,12 @@ int SoftmaxOpenCLKernel::Init() { runtime_->LoadSource(program_name, source); runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - ori_format_ = out_tensors_[0]->GetFormat(); + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); if (!is_image_out_) { - ori_format_ = schema::Format_NC; + out_ori_format_ = schema::Format_NC; out_tensors_[0]->SetFormat(schema::Format_NC); } MS_LOG(DEBUG) << kernel_name << " Init Done!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index c57dcf72ac2..328db29850b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -49,17 +49,13 @@ int TransposeOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - auto input_format = in_tensors_[0]->GetFormat(); - if (input_format != schema::Format_NHWC4) { - MS_LOG(ERROR) << "input format(" << input_format << ") " - << "format not support!"; - return RET_ERROR; - } if ((in_tensors_[0]->Height() * in_tensors_[0]->Width()) % 4 != 0) { MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; return RET_ERROR; } - ori_format_ = schema::Format_NCHW; + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_ori_format_ = schema::Format_NCHW; out_tensors_[0]->SetFormat(schema::Format_NCHW); if (!is_image_out_) { out_mem_type_ = OpenCLMemType::BUF; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 295638bc58e..167841e0afc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -35,7 +35,7 @@ class OpenCLKernel : public LiteKernel { public: explicit OpenCLKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs) - : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {} + : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {} virtual int Init() { return -1; } virtual int Prepare() { return -1; } @@ -49,11 +49,13 @@ class OpenCLKernel : public LiteKernel { } OpenCLMemType GetMemType() { return out_mem_type_; } void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } - schema::Format GetOriFormat() { return ori_format_;} + schema::Format GetInOriFormat() { return in_ori_format_; } + schema::Format GetOutOriFormat() { return out_ori_format_; } protected: OpenCLMemType out_mem_type_{OpenCLMemType::IMG}; - schema::Format ori_format_{schema::Format_NHWC4}; + schema::Format in_ori_format_{schema::Format_NHWC}; + schema::Format out_ori_format_{schema::Format_NHWC4}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index 93f14131e18..72859c98f11 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -38,10 +38,10 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector(jv); - schema::Format ori_format = cur_opencl_op->GetOriFormat(); + schema::Format out_ori_format = cur_opencl_op->GetOutOriFormat(); auto tens = cur_opencl_op->out_tensors(); if (mem_type == OpenCLMemType::BUF && mem_type == cur_opencl_op->GetMemType() && - tens[0]->GetFormat() == ori_format) { + tens[0]->GetFormat() == out_ori_format) { continue; } if (mem_type == OpenCLMemType::IMG) { @@ -53,14 +53,16 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector(in_kernels[i][0]); - schema::Format ori_format = cur_opencl_op->GetOriFormat(); + schema::Format out_ori_format = cur_opencl_op->GetOutOriFormat(); + schema::Format in_ori_format = cur_opencl_op->GetInOriFormat(); if (mem_type == OpenCLMemType::BUF && mem_type == cur_opencl_op->GetMemType() && - in_tensors[i]->GetFormat() == ori_format) { + in_tensors[i]->GetFormat() == out_ori_format) { continue; } - auto dst_format = (mem_type == OpenCLMemType::IMG) ? in_kernels[i][0]->out_tensors()[0]->GetFormat() : ori_format; + auto dst_format = + (mem_type == OpenCLMemType::IMG) ? in_kernels[i][0]->in_tensors()[0]->GetFormat() : out_ori_format; auto src_format = - (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i][0]->out_tensors()[0]->GetFormat(); + (mem_type == OpenCLMemType::IMG) ? in_ori_format : in_kernels[i][0]->out_tensors()[0]->GetFormat(); lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor(); MS_ASSERT(new_tensor); if (new_tensor == nullptr) { @@ -80,7 +82,14 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector dst_shape{shape[0], shape[2], shape[3], shape[1]}; new_tensor->set_shape(shape); } - new_tensor->SetFormat(in_kernels[i][0]->out_tensors()[0]->GetFormat()); + if (mem_type == OpenCLMemType::IMG) { + new_tensor->SetFormat(dst_format); + in_tensors[i]->SetFormat(src_format); + } else { + new_tensor->SetFormat(src_format); + in_tensors[i]->SetFormat(dst_format); + } + out_tensors->emplace_back(new_tensor); #ifdef ENABLE_FP16 KernelKey desc{kGPU, kNumberTypeFloat16, schema::PrimitiveType_ToFormat}; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc index 288144b06da..4ec2d16697a 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc @@ -161,7 +161,8 @@ TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { // compare CompareOutputData(output_data, correct_data, oh * ow * co, 0.00001); - + inputs[0]->SetData(nullptr); + outputs[0]->SetData(nullptr); MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed"; lite::opencl::OpenCLRuntime::DeleteInstance(); } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc index 27ba92f0182..3e10b5ce485 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc @@ -31,6 +31,7 @@ class TestMatMulOpenCL : public mindspore::CommonTest { TEST_F(TestMatMulOpenCL, MatMulFp32) { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); + auto allocator = ocl_runtime->GetAllocator(); size_t input_size; int ci = 1280; int co = 1001; @@ -47,16 +48,16 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { MS_LOG(ERROR) << "weight_data load error."; return; } - std::vector input_shape = {1, 1, 1, ci}; - auto tensor_x_ptr = std::make_unique(TypeId(kNumberTypeFloat32), input_shape); + std::vector input_shape = {1, ci}; + auto tensor_x_ptr = + std::make_unique(TypeId(kNumberTypeFloat32), input_shape, schema::Format_NC); auto tensor_x = tensor_x_ptr.get(); if (tensor_x == nullptr) { MS_LOG(ERROR) << "tensor_x create error."; return; } - tensor_x->SetData(input_data); - std::vector w_shape = {co, 1, 1, ci}; + std::vector w_shape = {co, ci}; auto tensor_w_ptr = std::make_unique(TypeId(kNumberTypeFloat32), w_shape); auto tensor_w = tensor_w_ptr.get(); if (tensor_w == nullptr) { @@ -65,8 +66,9 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { } tensor_w->SetData(weight_data); - std::vector out_shape = {1, 1, 1, co}; - auto tensor_out_ptr = std::make_unique(TypeId(kNumberTypeFloat32), out_shape); + std::vector out_shape = {1, co}; + auto tensor_out_ptr = + std::make_unique(TypeId(kNumberTypeFloat32), out_shape, schema::Format_NC); auto tensor_out = tensor_out_ptr.get(); if (tensor_out == nullptr) { MS_LOG(ERROR) << "tensor_out create error."; @@ -81,6 +83,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { return; } arith_kernel->Init(); + inputs[0]->MallocData(allocator); std::vector kernels{arith_kernel}; @@ -92,6 +95,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { return; } pGraph->Init(); + memcpy(inputs[0]->Data(), input_data, input_size); pGraph->Run(); size_t output_size; @@ -108,9 +112,10 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { std::cout << std::endl; // compare - CompareOutputData(output_data, correct_data, co, 0.00001); - - MS_LOG(INFO) << "TestMatMulFp32 passed"; + CompareOutputData(output_data, correct_data, co, 0.0001); + tensor_x->SetData(nullptr); + tensor_out->SetData(nullptr); lite::opencl::OpenCLRuntime::DeleteInstance(); + MS_LOG(INFO) << "TestMatMulFp32 passed"; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc index 5a5882da217..03516ff33f7 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc @@ -44,14 +44,15 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) { } std::vector input_shape = {1, h, w, c}; auto tensor_x_ptr = - std::make_unique(TypeId(kNumberTypeFloat32), input_shape, schema::Format_NHWC4); + std::make_unique(TypeId(kNumberTypeFloat32), input_shape, schema::Format_NHWC); auto tensor_x = tensor_x_ptr.get(); if (tensor_x == nullptr) { MS_LOG(ERROR) << "tensor_x create error."; return; } std::vector out_shape = {1, c, h, w}; - auto tensor_out_ptr = std::make_unique(TypeId(kNumberTypeFloat32), out_shape); + auto tensor_out_ptr = + std::make_unique(TypeId(kNumberTypeFloat32), out_shape, schema::Format_NCHW); auto tensor_out = tensor_out_ptr.get(); if (tensor_out == nullptr) { MS_LOG(ERROR) << "tensor_out create error."; @@ -102,7 +103,11 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) { // compare CompareOutputData(output_data, correct_data, h * w * c, 0.00001); - MS_LOG(INFO) << "Test TransposeFp32 passed"; + + inputs[0]->SetData(nullptr); + outputs[0]->SetData(nullptr); lite::opencl::OpenCLRuntime::DeleteInstance(); + + MS_LOG(INFO) << "Test TransposeFp32 passed"; } } // namespace mindspore