From 90f15df03743ef3bab61e97ffc620b42dd259682 Mon Sep 17 00:00:00 2001 From: VectorSL Date: Mon, 20 Jul 2020 17:21:49 +0800 Subject: [PATCH] add int64-->fp16 and update conv pad --- mindspore/_akg/gpu/cast.py | 2 ++ .../gpu/nn/conv2d_gpu_kernel.h | 8 +++++--- .../gpu/nn/conv2d_grad_filter_gpu_kernel.h | 8 +++++--- .../gpu/nn/conv2d_grad_input_gpu_kernel.h | 8 +++++--- mindspore/ops/_op_impl/akg/gpu/cast.py | 3 +++ tests/st/ops/gpu/test_cast_op.py | 19 ++++++++++++++++++- 6 files changed, 38 insertions(+), 10 deletions(-) diff --git a/mindspore/_akg/gpu/cast.py b/mindspore/_akg/gpu/cast.py index d6b38b6e9b..dd7517f0a5 100644 --- a/mindspore/_akg/gpu/cast.py +++ b/mindspore/_akg/gpu/cast.py @@ -20,6 +20,8 @@ from _akg.topi.generic import schedule_elemwise def Cast(x, dst_type): """cast.""" + if x.dtype == "int64" and dst_type == "float16": + x = cast.cast(x, "float32") return cast.cast(x, dst_type) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h index 559665e422..c5e8a26801 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h @@ -109,12 +109,14 @@ class Conv2dGpuFwdKernel : public GpuKernel { Set4DDesc(in_shape, filter_shape, output_shape); group_ = GetAttr(kernel_node, "group"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); - pad_height_ = GetAttr(kernel_node, "pad"); - pad_width_ = pad_height_; + auto pad_list = GetAttr>(kernel_node, "pad_list"); + pad_height_ = pad_list[0]; + pad_width_ = pad_list[2]; + auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]); pad_mode_ = GetAttr(kernel_node, "pad_mode"); SetStrideAndDilation(kernel_node); cudnnTensorDescriptor_t input_descriptor_real = nullptr; - if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { + if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) { SetPad(in_shape, kernel_node); input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_; } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h index 6ffa028f98..ac4d127e43 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h @@ -113,12 +113,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { group_ = GetAttr(kernel_node, "group"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); - pad_height_ = GetAttr(kernel_node, "pad"); - pad_width_ = pad_height_; + auto pad_list = GetAttr>(kernel_node, "pad_list"); + pad_height_ = pad_list[0]; + pad_width_ = pad_list[2]; + auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]); pad_mode_ = GetAttr(kernel_node, "pad_mode"); SetStrideAndDilation(kernel_node); cudnnTensorDescriptor_t x_desc_real = nullptr; - if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { + if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) { SetPad(in_shape, kernel_node); x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_; } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h index 1c6fb1aa5c..e40bd6898f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h @@ -114,12 +114,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { group_ = GetAttr(kernel_node, "group"); CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); - pad_height_ = GetAttr(kernel_node, "pad"); - pad_width_ = pad_height_; + auto pad_list = GetAttr>(kernel_node, "pad_list"); + pad_height_ = pad_list[0]; + pad_width_ = pad_list[2]; + auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]); pad_mode_ = GetAttr(kernel_node, "pad_mode"); SetStrideAndDilation(kernel_node); cudnnTensorDescriptor_t dx_desc_real = nullptr; - if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) { + if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) { SetPad(input_shape, kernel_node); dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_; } else { diff --git a/mindspore/ops/_op_impl/akg/gpu/cast.py b/mindspore/ops/_op_impl/akg/gpu/cast.py index 3c9ffa8974..68c280f348 100644 --- a/mindspore/ops/_op_impl/akg/gpu/cast.py +++ b/mindspore/ops/_op_impl/akg/gpu/cast.py @@ -50,6 +50,9 @@ cast_op_info = AkgGpuRegOp("Cast") \ .dtype_format(DataType.I16_Default, DataType.I32_Default) \ .dtype_format(DataType.I16_Default, DataType.I64_Default) \ .dtype_format(DataType.I64_Default, DataType.F64_Default) \ + .dtype_format(DataType.I64_Default, DataType.F32_Default) \ + .dtype_format(DataType.I64_Default, DataType.F16_Default) \ + .dtype_format(DataType.I64_Default, DataType.I32_Default) \ .dtype_format(DataType.I16_Default, DataType.F32_Default) \ .dtype_format(DataType.I16_Default, DataType.F16_Default) \ .dtype_format(DataType.F32_Default, DataType.I32_Default) \ diff --git a/tests/st/ops/gpu/test_cast_op.py b/tests/st/ops/gpu/test_cast_op.py index b3b48fcfa0..d3c8543101 100644 --- a/tests/st/ops/gpu/test_cast_op.py +++ b/tests/st/ops/gpu/test_cast_op.py @@ -92,7 +92,7 @@ def test_cast2(): @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_cast3(): - x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16)) + x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64)) t0 = mstype.int32 x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32)) t1 = mstype.int32 @@ -342,3 +342,20 @@ def test_cast17(): assert type0 == 'float32' type1 = output[1].asnumpy().dtype assert type1 == 'float16' + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_cast18(): + x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64)) + t0 = mstype.float32 + x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64)) + t1 = mstype.float16 + + context.set_context(mode=context.GRAPH_MODE, device_target='GPU') + net = Net(t0, t1) + output = net(x0, x1) + type0 = output[0].asnumpy().dtype + assert type0 == 'float32' + type1 = output[1].asnumpy().dtype + assert type1 == 'float16'