forked from OSSInnovation/mindspore
!3240 GPU update CAST and conv2d_pad
Merge pull request !3240 from VectorSL/update
This commit is contained in:
commit
bad04340d6
|
@ -20,6 +20,8 @@ from _akg.topi.generic import schedule_elemwise
|
|||
|
||||
def Cast(x, dst_type):
|
||||
"""cast."""
|
||||
if x.dtype == "int64" and dst_type == "float16":
|
||||
x = cast.cast(x, "float32")
|
||||
return cast.cast(x, dst_type)
|
||||
|
||||
|
||||
|
|
|
@ -109,12 +109,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|||
Set4DDesc(in_shape, filter_shape, output_shape);
|
||||
group_ = GetAttr<int>(kernel_node, "group");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
|
||||
pad_height_ = GetAttr<int>(kernel_node, "pad");
|
||||
pad_width_ = pad_height_;
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||
SetStrideAndDilation(kernel_node);
|
||||
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
||||
SetPad(in_shape, kernel_node);
|
||||
input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_;
|
||||
} else {
|
||||
|
|
|
@ -113,12 +113,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
|||
group_ = GetAttr<int>(kernel_node, "group");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
|
||||
|
||||
pad_height_ = GetAttr<int>(kernel_node, "pad");
|
||||
pad_width_ = pad_height_;
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||
SetStrideAndDilation(kernel_node);
|
||||
cudnnTensorDescriptor_t x_desc_real = nullptr;
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
||||
SetPad(in_shape, kernel_node);
|
||||
x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_;
|
||||
} else {
|
||||
|
|
|
@ -114,12 +114,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
|||
group_ = GetAttr<int>(kernel_node, "group");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
|
||||
|
||||
pad_height_ = GetAttr<int>(kernel_node, "pad");
|
||||
pad_width_ = pad_height_;
|
||||
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
|
||||
pad_height_ = pad_list[0];
|
||||
pad_width_ = pad_list[2];
|
||||
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
||||
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
||||
SetStrideAndDilation(kernel_node);
|
||||
cudnnTensorDescriptor_t dx_desc_real = nullptr;
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
|
||||
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
||||
SetPad(input_shape, kernel_node);
|
||||
dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_;
|
||||
} else {
|
||||
|
|
|
@ -50,6 +50,9 @@ cast_op_info = AkgGpuRegOp("Cast") \
|
|||
.dtype_format(DataType.I16_Default, DataType.I32_Default) \
|
||||
.dtype_format(DataType.I16_Default, DataType.I64_Default) \
|
||||
.dtype_format(DataType.I64_Default, DataType.F64_Default) \
|
||||
.dtype_format(DataType.I64_Default, DataType.F32_Default) \
|
||||
.dtype_format(DataType.I64_Default, DataType.F16_Default) \
|
||||
.dtype_format(DataType.I64_Default, DataType.I32_Default) \
|
||||
.dtype_format(DataType.I16_Default, DataType.F32_Default) \
|
||||
.dtype_format(DataType.I16_Default, DataType.F16_Default) \
|
||||
.dtype_format(DataType.F32_Default, DataType.I32_Default) \
|
||||
|
|
|
@ -92,7 +92,7 @@ def test_cast2():
|
|||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_cast3():
|
||||
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16))
|
||||
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
|
||||
t0 = mstype.int32
|
||||
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32))
|
||||
t1 = mstype.int32
|
||||
|
@ -342,3 +342,20 @@ def test_cast17():
|
|||
assert type0 == 'float32'
|
||||
type1 = output[1].asnumpy().dtype
|
||||
assert type1 == 'float16'
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_cast18():
|
||||
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
|
||||
t0 = mstype.float32
|
||||
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
|
||||
t1 = mstype.float16
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
|
||||
net = Net(t0, t1)
|
||||
output = net(x0, x1)
|
||||
type0 = output[0].asnumpy().dtype
|
||||
assert type0 == 'float32'
|
||||
type1 = output[1].asnumpy().dtype
|
||||
assert type1 == 'float16'
|
||||
|
|
Loading…
Reference in New Issue