uss op adapt

This commit is contained in:
zengzitao 2022-08-09 17:50:33 +08:00
parent 8f714e5b45
commit be53775137
6 changed files with 283 additions and 7 deletions

View File

@ -118,24 +118,72 @@ std::vector<std::pair<KernelAttr, UnsortedSegmentSumGpuKernelMod::UnsortedSegmen
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeFloat32, kNumberTypeInt64, float, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeFloat16, kNumberTypeInt32, half, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeFloat16, kNumberTypeInt64, half, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt32, kNumberTypeInt32, int, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt32, kNumberTypeInt64, int, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt64, kNumberTypeInt32, int64_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt64, kNumberTypeInt64, int64_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt32, kNumberTypeInt32, int32_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt32, kNumberTypeInt64, int32_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt16, kNumberTypeInt32, int16_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt16, kNumberTypeInt64, int16_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt8, kNumberTypeInt32, int8_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeInt8, kNumberTypeInt64, int8_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt64, kNumberTypeInt32, uint64_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt64, kNumberTypeInt64, uint64_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt32, kNumberTypeInt32, uint32_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt32, kNumberTypeInt64, uint32_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt16, kNumberTypeInt32, uint16_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt16, kNumberTypeInt64, uint16_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt8, kNumberTypeInt32, uint8_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_REGISTER(kNumberTypeUInt8, kNumberTypeInt64, uint8_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat64, kNumberTypeInt32, kNumberTypeInt32, double, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat64, kNumberTypeInt64, kNumberTypeInt32, double, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat32, kNumberTypeInt32, kNumberTypeInt32, float, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat32, kNumberTypeInt64, kNumberTypeInt32, float, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat16, kNumberTypeInt32, kNumberTypeInt32, half, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat16, kNumberTypeInt64, kNumberTypeInt32, half, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt32, kNumberTypeInt32, int, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt64, kNumberTypeInt32, int, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat64, kNumberTypeInt32, kNumberTypeInt64, double, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat64, kNumberTypeInt64, kNumberTypeInt64, double, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat32, kNumberTypeInt32, kNumberTypeInt64, float, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat32, kNumberTypeInt64, kNumberTypeInt64, float, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat16, kNumberTypeInt32, kNumberTypeInt64, half, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeFloat16, kNumberTypeInt64, kNumberTypeInt64, half, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt8, kNumberTypeInt32, kNumberTypeInt32, int8_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt8, kNumberTypeInt64, kNumberTypeInt32, int8_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt8, kNumberTypeInt32, kNumberTypeInt64, int8_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt8, kNumberTypeInt64, kNumberTypeInt64, int8_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt16, kNumberTypeInt32, kNumberTypeInt32, int16_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt16, kNumberTypeInt64, kNumberTypeInt32, int16_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt16, kNumberTypeInt32, kNumberTypeInt64, int16_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt16, kNumberTypeInt64, kNumberTypeInt64, int16_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt32, kNumberTypeInt32, int, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt64, kNumberTypeInt32, int, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt32, kNumberTypeInt64, int, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt64, kNumberTypeInt64, int, int64_t)}};
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt32, kNumberTypeInt64, kNumberTypeInt64, int, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt64, kNumberTypeInt32, kNumberTypeInt32, int64_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt64, kNumberTypeInt64, kNumberTypeInt32, int64_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt64, kNumberTypeInt32, kNumberTypeInt64, int64_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeInt64, kNumberTypeInt64, kNumberTypeInt64, int64_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt8, kNumberTypeInt32, kNumberTypeInt32, uint8_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt8, kNumberTypeInt64, kNumberTypeInt32, uint8_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt8, kNumberTypeInt32, kNumberTypeInt64, uint8_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt8, kNumberTypeInt64, kNumberTypeInt64, uint8_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt16, kNumberTypeInt32, kNumberTypeInt32, uint16_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt16, kNumberTypeInt64, kNumberTypeInt32, uint16_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt16, kNumberTypeInt32, kNumberTypeInt64, uint16_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt16, kNumberTypeInt64, kNumberTypeInt64, uint16_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt32, kNumberTypeInt32, kNumberTypeInt32, uint32_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt32, kNumberTypeInt64, kNumberTypeInt32, uint32_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt32, kNumberTypeInt32, kNumberTypeInt64, uint32_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt32, kNumberTypeInt64, kNumberTypeInt64, uint32_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt64, kNumberTypeInt32, kNumberTypeInt32, uint64_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt64, kNumberTypeInt64, kNumberTypeInt32, uint64_t, int64_t)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt64, kNumberTypeInt32, kNumberTypeInt64, uint64_t, int)},
{UNSORTED_SEGMENT_SUM_GPU_DY_REGISTER(kNumberTypeUInt64, kNumberTypeInt64, kNumberTypeInt64, uint64_t, int64_t)},
};
std::vector<KernelAttr> UnsortedSegmentSumGpuKernelMod::GetOpSupport() {
std::vector<KernelAttr> support_list;

View File

@ -76,9 +76,56 @@ template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input
size_t output_dim1, half* input_addr, int64_t* ids_addr,
half* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int8_t* input_addr, int* ids_addr,
int8_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int8_t* input_addr, int64_t* ids_addr,
int8_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int16_t* input_addr, int* ids_addr,
int16_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int16_t* input_addr, int64_t* ids_addr,
int16_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int* input_addr, int* ids_addr, int* output_addr,
cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int* input_addr, int64_t* ids_addr,
int* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int64_t* input_addr, int* ids_addr,
int64_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, int64_t* input_addr, int64_t* ids_addr,
int64_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint8_t* input_addr, int* ids_addr,
uint8_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint8_t* input_addr, int64_t* ids_addr,
uint8_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint16_t* input_addr, int* ids_addr,
uint16_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint16_t* input_addr, int64_t* ids_addr,
uint16_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint32_t* input_addr, int* ids_addr,
uint32_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint32_t* input_addr, int64_t* ids_addr,
uint32_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint64_t* input_addr, int* ids_addr,
uint64_t* output_addr, cudaStream_t stream, const uint32_t &device_id);
template CUDA_LIB_EXPORT void UnsortedSegmentSum(size_t input_dim0, size_t input_dim1, size_t output_dim0,
size_t output_dim1, uint64_t* input_addr, int64_t* ids_addr,
uint64_t* output_addr, cudaStream_t stream, const uint32_t &device_id);

View File

@ -173,7 +173,8 @@ TypePtr UnsortedSegmentSumInferType(const PrimitivePtr &primitive, const std::ve
(void)CheckAndConvertUtils::CheckScalarOrTensorTypesSame(args_num_segments, num_type_set, prim_name);
/* check input_x */
auto x_type_ptr = input_args[kInputIndex0]->BuildType();
std::set<TypePtr> x_type_set = {kFloat16, kFloat32, kFloat64, kInt32, kInt64};
std::set<TypePtr> x_type_set = {kFloat16, kFloat32, kFloat64, kInt8, kInt16, kInt32,
kInt64, kUInt8, kUInt16, kUInt32, kUInt64};
return CheckAndConvertUtils::CheckTensorTypeValid("input_x", x_type_ptr, x_type_set, prim_name);
}
} // namespace

View File

@ -311,3 +311,29 @@ def test_vmap2(func):
output_shape = (2, 3, 5, 5)
expected = np.array(expected).reshape(output_shape)
np.testing.assert_allclose(output.asnumpy(), expected, rtol=1e-3)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('func', ['sum'])
@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE])
@pytest.mark.parametrize('data_type', [mstype.uint8, mstype.uint16, mstype.uint32, mstype.uint64, mstype.int8,
mstype.int16, mstype.int32, mstype.int64, mstype.float16,
mstype.float32, mstype.float64])
@pytest.mark.parametrize('index_type', [mstype.int32])
def test_unsorted_segment_arithmetic_dytpe(mode, func, data_type, index_type):
"""
Feature: UnsortedSegmentSum operators dtype test.
Description: test cases for UnsortedSegmentSum operator
Expectation: the result match numpy implementation.
"""
context.set_context(mode=mode, device_target='CPU')
x = Tensor(np.random.randint(0, 100, size=[2, 3, 4, 3, 2]), data_type)
segment_ids = Tensor(np.random.randint(0, 5, size=[2, 3]), index_type)
num_segments = 5
net = TestUnsortedSegmentArithmeticNet(func, num_segments)
graph_output = net(x, segment_ids)
expected = unsorted_segment_arith_expected(func, x, segment_ids, num_segments)
np.testing.assert_array_almost_equal(graph_output.asnumpy(), expected)

View File

@ -0,0 +1,130 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class UnsortedSegmentSumDynamicShapeNetMS(nn.Cell):
def __init__(self):
super().__init__()
self.uss = P.UnsortedSegmentSum()
def construct(self, x, segment_ids, num_segments):
return self.uss(x, segment_ids, num_segments)
class UnsortedSegmentSumDynamicShapeNetMSBeta(nn.Cell):
def __init__(self, numsegments):
super().__init__()
self.uss = P.UnsortedSegmentSum()
self.numsegments = numsegments
def construct(self, x, segment_ids):
return self.uss(x, segment_ids, self.numsegments)
def dyn_case():
x = np.arange(1, 10).reshape(3, 3).astype(np.float32)
input_x_dyn = Tensor(shape=[None, 3], dtype=mindspore.float32)
input_x = Tensor(x)
segment_ids = Tensor([0, 1, 0], mindspore.int32)
num_segments = Tensor([2,], mindspore.int32)
expect_np = np.array([[8, 10, 12], [4, 5, 6]], dtype=np.float32)
net = UnsortedSegmentSumDynamicShapeNetMS()
net.set_inputs(input_x_dyn, segment_ids, num_segments)
output = net(input_x, segment_ids, num_segments)
rtol = 1.e-4
atol = 1.e-4
assert np.allclose(output.asnumpy(), expect_np, rtol, atol, equal_nan=True)
def dyn_case_beta():
x = np.arange(1, 10).reshape(3, 3).astype(np.float32)
input_x_dyn = Tensor(shape=[3, None], dtype=mindspore.float32)
input_x = Tensor(x)
segment_ids = Tensor([0, 1, 0], mindspore.int32)
num_segments = 2
expect_np = np.array([[8, 10, 12], [4, 5, 6]], dtype=np.float32)
net = UnsortedSegmentSumDynamicShapeNetMSBeta(num_segments)
net.set_inputs(input_x_dyn, segment_ids)
output = net(input_x, segment_ids)
rtol = 1.e-4
atol = 1.e-4
assert np.allclose(output.asnumpy(), expect_np, rtol, atol, equal_nan=True)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_uss_dyn_cpu():
"""
Feature: test UnsortedSegmentSum dynamic shape on CPU, all inputs are tensor.
Description: inputs is dynamic shape.
Expectation: the result match with numpy result
"""
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
dyn_case()
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
dyn_case()
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_uss_dyn_gpu():
"""
Feature: test UnsortedSegmentSum dynamic shape on GPU, all inputs are tensor.
Description: inputs is dynamic shape.
Expectation: the result match with numpy result
"""
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
dyn_case()
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
dyn_case()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_uss_dyn_cpu_beta():
"""
Feature: test UnsortedSegmentSum dynamic shape on CPU, num_segments is a var.
Description: inputs is dynamic shape.
Expectation: the result match with numpy result
"""
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
dyn_case_beta()
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
dyn_case_beta()
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_uss_dyn_gpu_beta():
"""
Feature: test UnsortedSegmentSum dynamic shape on GPU, num_segments is a var.
Description: inputs is dynamic shape.
Expectation: the result match with numpy result
"""
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
dyn_case_beta()
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
dyn_case_beta()

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -117,6 +117,7 @@ class UnsortedSegmentSumDynNet(nn.Cell):
self.num_segments = num_segments
self.to_dyn_1 = dyn_a
self.to_dyn_2 = dyn_b
def construct(self, data, ids):
# testing selective inputs being dynamic
if self.to_dyn_1:
@ -310,3 +311,26 @@ def test_dyn_b():
[0., 0., 0.],
[0., 0., 0.]]]
assert (output.asnumpy() == expect).all()
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE])
@pytest.mark.parametrize('dtype', [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32,
np.int64, np.float16, np.float32, np.float64])
def test_uss_dtype(mode, dtype):
"""
Feature: test ops.UnsortedSegmentSum forward.
Description: inputs with different data type.
Expectation: the result match with expect
"""
context.set_context(mode=mode, device_target='GPU')
x = np.arange(1, 10).reshape(3, 3).astype(dtype)
input_x = Tensor(x)
segment_ids = Tensor([0, 1, 0], mstype.int32)
num_segments = 2
net = UnsortedSegmentSumNet(num_segments)
output = net(input_x, segment_ids)
expect_np = np.array([[8, 10, 12], [4, 5, 6]], dtype=dtype)
assert (output.asnumpy() == expect_np).all()