add int64-->fp16 and update conv pad

This commit is contained in:
VectorSL 2020-07-20 17:21:49 +08:00
parent 8e4c0a9d93
commit 90f15df037
6 changed files with 38 additions and 10 deletions

View File

@ -20,6 +20,8 @@ from _akg.topi.generic import schedule_elemwise
def Cast(x, dst_type):
"""cast."""
if x.dtype == "int64" and dst_type == "float16":
x = cast.cast(x, "float32")
return cast.cast(x, dst_type)

View File

@ -109,12 +109,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
Set4DDesc(in_shape, filter_shape, output_shape);
group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_width_ = pad_height_;
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(in_shape, kernel_node);
input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_;
} else {

View File

@ -113,12 +113,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_width_ = pad_height_;
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t x_desc_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(in_shape, kernel_node);
x_desc_real = use_pad_ ? padded_descriptor_ : x_desc_;
} else {

View File

@ -114,12 +114,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
group_ = GetAttr<int>(kernel_node, "group");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed");
pad_height_ = GetAttr<int>(kernel_node, "pad");
pad_width_ = pad_height_;
auto pad_list = GetAttr<std::vector<int>>(kernel_node, "pad_list");
pad_height_ = pad_list[0];
pad_width_ = pad_list[2];
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
SetStrideAndDilation(kernel_node);
cudnnTensorDescriptor_t dx_desc_real = nullptr;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
SetPad(input_shape, kernel_node);
dx_desc_real = use_pad_ ? padded_descriptor_ : dx_desc_;
} else {

View File

@ -50,6 +50,9 @@ cast_op_info = AkgGpuRegOp("Cast") \
.dtype_format(DataType.I16_Default, DataType.I32_Default) \
.dtype_format(DataType.I16_Default, DataType.I64_Default) \
.dtype_format(DataType.I64_Default, DataType.F64_Default) \
.dtype_format(DataType.I64_Default, DataType.F32_Default) \
.dtype_format(DataType.I64_Default, DataType.F16_Default) \
.dtype_format(DataType.I64_Default, DataType.I32_Default) \
.dtype_format(DataType.I16_Default, DataType.F32_Default) \
.dtype_format(DataType.I16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_Default, DataType.I32_Default) \

View File

@ -92,7 +92,7 @@ def test_cast2():
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_cast3():
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16))
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t0 = mstype.int32
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32))
t1 = mstype.int32
@ -342,3 +342,20 @@ def test_cast17():
assert type0 == 'float32'
type1 = output[1].asnumpy().dtype
assert type1 == 'float16'
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_cast18():
x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t0 = mstype.float32
x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
t1 = mstype.float16
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
net = Net(t0, t1)
output = net(x0, x1)
type0 = output[0].asnumpy().dtype
assert type0 == 'float32'
type1 = output[1].asnumpy().dtype
assert type1 == 'float16'