!35284 [MD] Adding docstrings for minddata UT Python Part 4

Merge pull request !35284 from davidanugraha/add_dataset_test_comment_part4
This commit is contained in:
i-robot 2022-06-04 17:46:20 +00:00 committed by Gitee
commit e29229320e
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
32 changed files with 1808 additions and 291 deletions

View File

@ -92,6 +92,7 @@
"mindspore/tests/ut/python/dataset/test_batch.py" "broad-except" "mindspore/tests/ut/python/dataset/test_batch.py" "broad-except"
"mindspore/tests/ut/python/dataset/test_config.py" "broad-except" "mindspore/tests/ut/python/dataset/test_config.py" "broad-except"
"mindspore/tests/ut/python/dataset/test_minddataset.py" "redefined-outer-name" "mindspore/tests/ut/python/dataset/test_minddataset.py" "redefined-outer-name"
"mindspore/tests/ut/python/dataset/test_minddataset.py" "unused-variable"
"mindspore/tests/ut/python/dataset/test_minddataset_sampler.py" "redefined-outer-name" "mindspore/tests/ut/python/dataset/test_minddataset_sampler.py" "redefined-outer-name"
"mindspore/tests/ut/python/dataset/test_serdes_dataset.py" "redefined-outer-name" "mindspore/tests/ut/python/dataset/test_serdes_dataset.py" "redefined-outer-name"
"mindspore/tests/ut/python/dataset/test_serdes_dataset.py" "unused-import" "mindspore/tests/ut/python/dataset/test_serdes_dataset.py" "unused-import"

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -223,6 +223,11 @@ def build_test_case_2maps(epochs, steps):
def test_callbacks_all_methods(): def test_callbacks_all_methods():
"""
Feature: Callback
Description: Test Map op with 1 callback with various num_epochs and num_steps args combinations
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_all_methods") logger.info("test_callbacks_all_methods")
build_test_case_1cb(1, 1) build_test_case_1cb(1, 1)
@ -242,6 +247,11 @@ def test_callbacks_all_methods():
def test_callbacks_var_step_size(): def test_callbacks_var_step_size():
"""
Feature: Callback
Description: Test Map op with 1 callback with step_size=2 and various num_epochs and num_steps args combinations
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_var_step_size") logger.info("test_callbacks_var_step_size")
build_test_case_1cb(1, 2, 2) build_test_case_1cb(1, 2, 2)
@ -258,6 +268,11 @@ def test_callbacks_var_step_size():
def test_callbacks_all_2cbs(): def test_callbacks_all_2cbs():
"""
Feature: Callback
Description: Test Map op with 2 callbacks with various num_epochs and num_steps args combinations
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_all_2cbs") logger.info("test_callbacks_all_2cbs")
build_test_case_2cbs(4, 1) build_test_case_2cbs(4, 1)
@ -301,6 +316,11 @@ class Net(nn.Cell):
def test_callbacks_non_sink(): def test_callbacks_non_sink():
"""
Feature: Callback
Description: Test callbacks with dataset_sink_mode=False in train
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_non_sink") logger.info("test_callbacks_non_sink")
events = [] events = []
@ -325,6 +345,11 @@ def test_callbacks_non_sink():
def test_callbacks_non_sink_batch_size2(): def test_callbacks_non_sink_batch_size2():
"""
Feature: Callback
Description: Test callbacks with dataset_sink_mode=False in train after batch(2) is applied to the dataset
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_non_sink_batch_size2") logger.info("test_callbacks_non_sink_batch_size2")
events = [] events = []
@ -349,6 +374,11 @@ def test_callbacks_non_sink_batch_size2():
def test_callbacks_non_sink_mismatch_size(): def test_callbacks_non_sink_mismatch_size():
"""
Feature: Callback
Description: Test callbacks with dataset_sink_mode=False in train with mismatch size
Expectation: Exception is raised as expected
"""
logger.info("test_callbacks_non_sink_mismatch_size") logger.info("test_callbacks_non_sink_mismatch_size")
default_timeout = ds.config.get_callback_timeout() default_timeout = ds.config.get_callback_timeout()
ds.config.set_callback_timeout(1) ds.config.set_callback_timeout(1)
@ -370,6 +400,11 @@ def test_callbacks_non_sink_mismatch_size():
def test_callbacks_validations(): def test_callbacks_validations():
"""
Feature: Callback
Description: Test callbacks param in Map op with invalid argument
Expectation: Exception is raised as expected
"""
logger.info("test_callbacks_validations") logger.info("test_callbacks_validations")
with pytest.raises(Exception) as err: with pytest.raises(Exception) as err:
@ -397,6 +432,11 @@ def test_callbacks_validations():
def test_callbacks_sink_simulation(): def test_callbacks_sink_simulation():
"""
Feature: Callback
Description: Test callbacks under sink simulation
Expectation: Output is equal to the expected output
"""
logger.info("test_callback_sink_simulation") logger.info("test_callback_sink_simulation")
events = [] events = []
@ -424,6 +464,11 @@ def test_callbacks_sink_simulation():
def test_callbacks_repeat(): def test_callbacks_repeat():
"""
Feature: Callback
Description: Test Map op with 1 callback with various num_epochs, num_steps, step_size, and repeat args combinations
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_repeat") logger.info("test_callbacks_repeat")
build_test_case_1cb(epochs=2, steps=2, step_size=1, repeat=2) build_test_case_1cb(epochs=2, steps=2, step_size=1, repeat=2)
@ -453,6 +498,11 @@ def test_callbacks_exceptions():
def test_callbacks_train_end(): def test_callbacks_train_end():
"""
Feature: Callback
Description: Test callback end op under sink simulation
Expectation: Runs successfully
"""
logger.info("test_callback_sink_simulation") logger.info("test_callback_sink_simulation")
# No asserts are needed, just test there is no deadlock or exceptions # No asserts are needed, just test there is no deadlock or exceptions
events = [] events = []
@ -469,6 +519,11 @@ def test_callbacks_train_end():
def test_callbacks_one_cb(): def test_callbacks_one_cb():
"""
Feature: Callback
Description: Test callbacks with Begin, EpochBegin, EpochEnd, StepBegin, and StepEnd as the args
Expectation: Output is equal to the expected output
"""
logger.info("test_callbacks_one_cb") logger.info("test_callbacks_one_cb")
data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False)
@ -510,6 +565,11 @@ def test_callbacks_one_cb():
def test_clear_callback(): def test_clear_callback():
"""
Feature: Callback
Description: Test callback is removed for get_dataset_size and output_shape/type
Expectation: Output is equal to the expected output
"""
logger.info("test_clear_callback") logger.info("test_clear_callback")
# this test case will test that callback is removed for get_dataset_size and output_shape/type # this test case will test that callback is removed for get_dataset_size and output_shape/type

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -24,6 +24,11 @@ import mindspore.dataset.transforms as data_trans
def test_concatenate_op_all(): def test_concatenate_op_all():
"""
Feature: Concatenate op
Description: Test Concatenate op with all input parameters provided
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array([5., 6., 7., 8.], dtype=np.float),) yield (np.array([5., 6., 7., 8.], dtype=np.float),)
@ -39,6 +44,11 @@ def test_concatenate_op_all():
def test_concatenate_op_none(): def test_concatenate_op_none():
"""
Feature: Concatenate op
Description: Test Concatenate op with none of the input parameters provided
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array([5., 6., 7., 8.], dtype=np.float),) yield (np.array([5., 6., 7., 8.], dtype=np.float),)
@ -51,6 +61,11 @@ def test_concatenate_op_none():
def test_concatenate_op_string(): def test_concatenate_op_string():
"""
Feature: Concatenate op
Description: Test Concatenate op on array of strings
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array(["ss", "ad"], dtype='S'),) yield (np.array(["ss", "ad"], dtype='S'),)
@ -66,6 +81,11 @@ def test_concatenate_op_string():
def test_concatenate_op_multi_input_string(): def test_concatenate_op_multi_input_string():
"""
Feature: Concatenate op
Description: Test Concatenate op on multi dimension array of strings
Expectation: Output is equal to the expected output
"""
prepend_tensor = np.array(["dw", "df"], dtype='S') prepend_tensor = np.array(["dw", "df"], dtype='S')
append_tensor = np.array(["dwsdf", "df"], dtype='S') append_tensor = np.array(["dwsdf", "df"], dtype='S')
@ -82,6 +102,11 @@ def test_concatenate_op_multi_input_string():
def test_concatenate_op_multi_input_numeric(): def test_concatenate_op_multi_input_numeric():
"""
Feature: Concatenate op
Description: Test Concatenate op on multi dimension array of ints
Expectation: Output is equal to the expected output
"""
prepend_tensor = np.array([3, 5]) prepend_tensor = np.array([3, 5])
data = ([[1, 2]], [[3, 4]]) data = ([[1, 2]], [[3, 4]])
@ -97,6 +122,12 @@ def test_concatenate_op_multi_input_numeric():
def test_concatenate_op_type_mismatch(): def test_concatenate_op_type_mismatch():
"""
Feature: Concatenate op
Description: Test Concatenate op where the data type of the original array dataset (float) has a mismatch
data type with tensor that will be concatenated (string)
Expectation: Error is raised as expected
"""
def gen(): def gen():
yield (np.array([3, 4], dtype=np.float),) yield (np.array([3, 4], dtype=np.float),)
@ -112,6 +143,12 @@ def test_concatenate_op_type_mismatch():
def test_concatenate_op_type_mismatch2(): def test_concatenate_op_type_mismatch2():
"""
Feature: Concatenate op
Description: Test Concatenate op where the data type of the original array dataset (string) has a mismatch
data type with tensor that will be concatenated (float)
Expectation: Error is raised as expected
"""
def gen(): def gen():
yield (np.array(["ss", "ad"], dtype='S'),) yield (np.array(["ss", "ad"], dtype='S'),)
@ -127,6 +164,11 @@ def test_concatenate_op_type_mismatch2():
def test_concatenate_op_incorrect_dim(): def test_concatenate_op_incorrect_dim():
"""
Feature: Concatenate op
Description: Test Concatenate op using original dataset with incorrect dimension
Expectation: Error is raised as expected
"""
def gen(): def gen():
yield (np.array([["ss", "ad"], ["ss", "ad"]], dtype='S'),) yield (np.array([["ss", "ad"], ["ss", "ad"]], dtype='S'),)
@ -142,12 +184,22 @@ def test_concatenate_op_incorrect_dim():
def test_concatenate_op_wrong_axis(): def test_concatenate_op_wrong_axis():
"""
Feature: Concatenate op
Description: Test Concatenate op using wrong axis argument
Expectation: Error is raised as expected
"""
with pytest.raises(ValueError) as error_info: with pytest.raises(ValueError) as error_info:
data_trans.Concatenate(2) data_trans.Concatenate(2)
assert "only 1D concatenation supported." in str(error_info.value) assert "only 1D concatenation supported." in str(error_info.value)
def test_concatenate_op_negative_axis(): def test_concatenate_op_negative_axis():
"""
Feature: Concatenate op
Description: Test Concatenate op using negative axis argument
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array([5., 6., 7., 8.], dtype=np.float),) yield (np.array([5., 6., 7., 8.], dtype=np.float),)
@ -163,6 +215,11 @@ def test_concatenate_op_negative_axis():
def test_concatenate_op_incorrect_input_dim(): def test_concatenate_op_incorrect_input_dim():
"""
Feature: Concatenate op
Description: Test Concatenate op using array that we would like to concatenate with incorrect dimensions
Expectation: Error is raised as expected
"""
prepend_tensor = np.array([["ss", "ad"], ["ss", "ad"]], dtype='S') prepend_tensor = np.array([["ss", "ad"], ["ss", "ad"]], dtype='S')
with pytest.raises(ValueError) as error_info: with pytest.raises(ValueError) as error_info:

View File

@ -32,8 +32,8 @@ def count_unequal_element(data_expected, data_me, rtol, atol):
def test_create_dct_none(): def test_create_dct_none():
""" """
Feature: CreateDct Feature: Create DCT transformation
Description: Test CreateDct in eager mode Description: Test create_dct in eager mode with no normalization
Expectation: The returned result is as expected Expectation: The returned result is as expected
""" """
expect = np.array([[2.00000000, 1.84775901], expect = np.array([[2.00000000, 1.84775901],
@ -46,8 +46,8 @@ def test_create_dct_none():
def test_create_dct_ortho(): def test_create_dct_ortho():
""" """
Feature: CreateDct Feature: Create DCT transformation
Description: Test CreateDct in eager mode Description: Test create_dct in eager mode with orthogonal normalization
Expectation: The returned result is as expected Expectation: The returned result is as expected
""" """
output = create_dct(1, 3, NormMode.ORTHO) output = create_dct(1, 3, NormMode.ORTHO)
@ -59,9 +59,9 @@ def test_create_dct_ortho():
def test_createdct_invalid_input(): def test_createdct_invalid_input():
""" """
Feature: CreateDct Feature: Create DCT transformation
Description: Error detection Description: Test create_dct with invalid inputs
Expectation: Return error Expectation: Error is raised as expected
""" """
def test_invalid_input(test_name, n_mfcc, n_mels, norm, error, error_msg): def test_invalid_input(test_name, n_mfcc, n_mels, norm, error, error_msg):
logger.info("Test CreateDct with bad input: {0}".format(test_name)) logger.info("Test CreateDct with bad input: {0}".format(test_name))

View File

@ -1,4 +1,4 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -38,7 +38,9 @@ def diff_mse(in1, in2):
def test_cifar10(): def test_cifar10():
""" """
dataset parameter Feature: Epoch Control op
Description: Test num_epochs as tuple iterator param for Cifar10Dataset
Expectation: Output is equal to the expected output
""" """
logger.info("Test dataset parameter") logger.info("Test dataset parameter")
data_dir_10 = "../data/dataset/testCifar10Data" data_dir_10 = "../data/dataset/testCifar10Data"
@ -70,7 +72,9 @@ def test_cifar10():
def test_decode_op(): def test_decode_op():
""" """
Test Decode op Feature: Epoch Control op
Description: Test num_epochs as dict iterator param for dataset which Decode op has been applied onto it
Expectation: Output is equal to the expected output before iterator is stopped, then correct error is raised
""" """
logger.info("test_decode_op") logger.info("test_decode_op")
@ -125,7 +129,9 @@ def generator_1d():
def test_generator_dict_0(): def test_generator_dict_0():
""" """
test generator dict 0 Feature: Epoch Control op
Description: Test dict iterator inside the loop declaration for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -142,7 +148,9 @@ def test_generator_dict_0():
def test_generator_dict_1(): def test_generator_dict_1():
""" """
test generator dict 1 Feature: Epoch Control op
Description: Test dict iterator outside the epoch for loop for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -162,7 +170,9 @@ def test_generator_dict_1():
def test_generator_dict_2(): def test_generator_dict_2():
""" """
test generator dict 2 Feature: Epoch Control op
Description: Test dict iterator with num_epochs=-1 for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output and iterator never shutdown
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -187,7 +197,9 @@ def test_generator_dict_2():
def test_generator_dict_3(): def test_generator_dict_3():
""" """
test generator dict 3 Feature: Epoch Control op
Description: Test dict iterator with num_epochs=-1 followed by stop for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output before stop, then error is raised
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -213,7 +225,10 @@ def test_generator_dict_3():
def test_generator_dict_4(): def test_generator_dict_4():
""" """
test generator dict 4 Feature: Epoch Control op
Description: Test dict iterator by fetching data beyond the specified number of epochs for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -236,7 +251,11 @@ def test_generator_dict_4():
def test_generator_dict_4_1(): def test_generator_dict_4_1():
""" """
test generator dict 4_1 Feature: Epoch Control op
Description: Test dict iterator by fetching data beyond the specified number of epochs where num_epochs=1 so
Epoch Control op will not be injected, using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -260,7 +279,11 @@ def test_generator_dict_4_1():
def test_generator_dict_4_2(): def test_generator_dict_4_2():
""" """
test generator dict 4_2 Feature: Epoch Control op
Description: Test dict iterator by fetching data beyond the specified number of epochs where num_epochs=1 so
Epoch Control op will not be injected, after repeat op with num_repeat=1, using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -286,7 +309,11 @@ def test_generator_dict_4_2():
def test_generator_dict_5(): def test_generator_dict_5():
""" """
test generator dict 5 Feature: Epoch Control op
Description: Test dict iterator by fetching data below (2 loops) then
beyond the specified number of epochs using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -320,7 +347,9 @@ def test_generator_dict_5():
def test_generator_tuple_0(): def test_generator_tuple_0():
""" """
test generator tuple 0 Feature: Epoch Control op
Description: Test tuple iterator inside the loop declaration for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -337,7 +366,9 @@ def test_generator_tuple_0():
def test_generator_tuple_1(): def test_generator_tuple_1():
""" """
test generator tuple 1 Feature: Epoch Control op
Description: Test tuple iterator outside the epoch for loop for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -357,7 +388,9 @@ def test_generator_tuple_1():
def test_generator_tuple_2(): def test_generator_tuple_2():
""" """
test generator tuple 2 Feature: Epoch Control op
Description: Test tuple iterator with num_epochs=-1 for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output and iterator never shutdown
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -381,7 +414,9 @@ def test_generator_tuple_2():
def test_generator_tuple_3(): def test_generator_tuple_3():
""" """
test generator tuple 3 Feature: Epoch Control op
Description: Test tuple iterator with num_epochs=-1 followed by stop for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output before stop, then error is raised
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -406,7 +441,10 @@ def test_generator_tuple_3():
def test_generator_tuple_4(): def test_generator_tuple_4():
""" """
test generator tuple 4 Feature: Epoch Control op
Description: Test tuple iterator by fetching data beyond the specified num_epochs for 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -429,7 +467,11 @@ def test_generator_tuple_4():
def test_generator_tuple_5(): def test_generator_tuple_5():
""" """
test generator tuple 5 Feature: Epoch Control op
Description: Test tuple iterator by fetching data below (2 loops) then
beyond the specified number of epochs using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -462,7 +504,11 @@ def test_generator_tuple_5():
# Test with repeat # Test with repeat
def test_generator_tuple_repeat_1(): def test_generator_tuple_repeat_1():
""" """
test generator tuple repeat 1 Feature: Epoch Control op
Description: Test tuple iterator by applying Repeat op first, next fetching data below (2 loops) then
beyond the specified number of epochs using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -496,7 +542,11 @@ def test_generator_tuple_repeat_1():
# Test with repeat # Test with repeat
def test_generator_tuple_repeat_repeat_1(): def test_generator_tuple_repeat_repeat_1():
""" """
test generator tuple repeat repeat 1 Feature: Epoch Control op
Description: Test tuple iterator by applying Repeat op first twice, next fetching data below (2 loops) then
beyond the specified number of epochs using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output when fetching data under the specified num_epochs,
then error is raised due to EOF buffer encountered
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -530,7 +580,10 @@ def test_generator_tuple_repeat_repeat_1():
def test_generator_tuple_repeat_repeat_2(): def test_generator_tuple_repeat_repeat_2():
""" """
test generator tuple repeat repeat 2 Feature: Epoch Control op
Description: Test tuple iterator with num_epochs=-1 by applying Repeat op first twice, next
stop op is called on the iterator using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output before stop is called, then error is raised
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -557,7 +610,10 @@ def test_generator_tuple_repeat_repeat_2():
def test_generator_tuple_repeat_repeat_3(): def test_generator_tuple_repeat_repeat_3():
""" """
test generator tuple repeat repeat 3 Feature: Epoch Control op
Description: Test tuple iterator by applying Repeat op first twice, then do 2 loops
that the sum of iteration is equal to the specified num_epochs using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -587,7 +643,10 @@ def test_generator_tuple_repeat_repeat_3():
def test_generator_tuple_infinite_repeat_repeat_1(): def test_generator_tuple_infinite_repeat_repeat_1():
""" """
test generator tuple infinite repeat repeat 1 Feature: Epoch Control op
Description: Test tuple iterator by applying infinite Repeat then Repeat with specified num_repeat,
then iterate using iterator using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -610,7 +669,10 @@ def test_generator_tuple_infinite_repeat_repeat_1():
def test_generator_tuple_infinite_repeat_repeat_2(): def test_generator_tuple_infinite_repeat_repeat_2():
""" """
test generator tuple infinite repeat repeat 2 Feature: Epoch Control op
Description: Test tuple iterator by applying Repeat with specified num_repeat then infinite Repeat,
then iterate using iterator using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -633,7 +695,10 @@ def test_generator_tuple_infinite_repeat_repeat_2():
def test_generator_tuple_infinite_repeat_repeat_3(): def test_generator_tuple_infinite_repeat_repeat_3():
""" """
test generator tuple infinite repeat repeat 3 Feature: Epoch Control op
Description: Test tuple iterator by applying infinite Repeat first twice,
then iterate using iterator using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -656,7 +721,10 @@ def test_generator_tuple_infinite_repeat_repeat_3():
def test_generator_tuple_infinite_repeat_repeat_4(): def test_generator_tuple_infinite_repeat_repeat_4():
""" """
test generator tuple infinite repeat repeat 4 Feature: Epoch Control op
Description: Test tuple iterator with num_epochs=1 by applying infinite Repeat first twice,
then iterate using iterator using 1D GeneratorDataset 0-63
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")
@ -679,7 +747,10 @@ def test_generator_tuple_infinite_repeat_repeat_4():
def test_generator_reusedataset(): def test_generator_reusedataset():
""" """
test generator reusedataset Feature: Epoch Control op
Description: Test iterator and other op (Repeat/Batch) on 1D GeneratorDataset 0-63 which previously
has been applied with iterator and other op (Repeat/Batch)
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1D Generator : 0 - 63") logger.info("Test 1D Generator : 0 - 63")

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -22,6 +22,11 @@ import mindspore.dataset.transforms as data_trans
def test_fillop_basic(): def test_fillop_basic():
"""
Feature: Fill op
Description: Test Fill op basic usage (positive int onto an array of uint8)
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array([4, 5, 6, 7], dtype=np.uint8),) yield (np.array([4, 5, 6, 7], dtype=np.uint8),)
@ -35,6 +40,11 @@ def test_fillop_basic():
def test_fillop_down_type_cast(): def test_fillop_down_type_cast():
"""
Feature: Fill op
Description: Test Fill op with a negative number onto an array of unsigned int8
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array([4, 5, 6, 7], dtype=np.uint8),) yield (np.array([4, 5, 6, 7], dtype=np.uint8),)
@ -48,6 +58,11 @@ def test_fillop_down_type_cast():
def test_fillop_up_type_cast(): def test_fillop_up_type_cast():
"""
Feature: Fill op
Description: Test Fill op with a int onto an array of floats
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array([4, 5, 6, 7], dtype=np.float),) yield (np.array([4, 5, 6, 7], dtype=np.float),)
@ -61,6 +76,11 @@ def test_fillop_up_type_cast():
def test_fillop_string(): def test_fillop_string():
"""
Feature: Fill op
Description: Test Fill op with a string onto an array of strings
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array(["45555", "45555"], dtype='S'),) yield (np.array(["45555", "45555"], dtype='S'),)
@ -74,6 +94,11 @@ def test_fillop_string():
def test_fillop_bytes(): def test_fillop_bytes():
"""
Feature: Fill op
Description: Test Fill op with bytes onto an array of strings
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array(["A", "B", "C"], dtype='S'),) yield (np.array(["A", "B", "C"], dtype='S'),)
@ -87,6 +112,11 @@ def test_fillop_bytes():
def test_fillop_error_handling(): def test_fillop_error_handling():
"""
Feature: Fill op
Description: Test Fill op with a mismatch data type (string onto an array of ints)
Expectation: Error is raised as expected
"""
def gen(): def gen():
yield (np.array([4, 4, 4, 4]),) yield (np.array([4, 4, 4, 4]),)

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -22,8 +22,12 @@ DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
# test for predicate
def test_diff_predicate_func(): def test_diff_predicate_func():
"""
Feature: Filter op
Description: Test Filter op using predicate function as an arg
Expectation: Output is equal to the expected output
"""
def test_filter(predicate_func): def test_filter(predicate_func):
transforms = [ transforms = [
cde.Decode(), cde.Decode(),
@ -58,8 +62,12 @@ def generator_1d():
yield (np.array(i),) yield (np.array(i),)
# test with GeneratorDataset
def test_filter_by_generator_with_no(): def test_filter_by_generator_with_no():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4)
num_iter = 0 num_iter = 0
@ -69,8 +77,12 @@ def test_filter_by_generator_with_no():
num_iter += 1 num_iter += 1
# test with repeatOp before
def test_filter_by_generator_with_repeat(): def test_filter_by_generator_with_repeat():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Repeat op before
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_r = dataset.repeat(4) dataset_r = dataset.repeat(4)
dataset_f = dataset_r.filter(predicate=filter_func_ge, num_parallel_workers=4) dataset_f = dataset_r.filter(predicate=filter_func_ge, num_parallel_workers=4)
@ -87,8 +99,12 @@ def test_filter_by_generator_with_repeat():
assert ret_data[index] == expected_rs[ii] assert ret_data[index] == expected_rs[ii]
# test with repeatOp after
def test_filter_by_generator_with_repeat_after(): def test_filter_by_generator_with_repeat_after():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Repeat op after
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_f = dataset.filter(predicate=filter_func_ge, num_parallel_workers=4) dataset_f = dataset.filter(predicate=filter_func_ge, num_parallel_workers=4)
dataset_r = dataset_f.repeat(4) dataset_r = dataset_f.repeat(4)
@ -113,8 +129,12 @@ def filter_func_batch_after(data):
return data <= 20 return data <= 20
# test with batchOp before
def test_filter_by_generator_with_batch(): def test_filter_by_generator_with_batch():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Batch op before
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_b = dataset.batch(4) dataset_b = dataset.batch(4)
dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4) dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4)
@ -129,8 +149,12 @@ def test_filter_by_generator_with_batch():
assert ret_data[2][0] == 8 assert ret_data[2][0] == 8
# test with batchOp after
def test_filter_by_generator_with_batch_after(): def test_filter_by_generator_with_batch_after():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Batch op after
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_f = dataset.filter(predicate=filter_func_batch_after, num_parallel_workers=4) dataset_f = dataset.filter(predicate=filter_func_batch_after, num_parallel_workers=4)
dataset_b = dataset_f.batch(4) dataset_b = dataset_f.batch(4)
@ -149,8 +173,12 @@ def filter_func_shuffle(data):
return data <= 20 return data <= 20
# test with batchOp before
def test_filter_by_generator_with_shuffle(): def test_filter_by_generator_with_shuffle():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Shuffle op before
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_s = dataset.shuffle(4) dataset_s = dataset.shuffle(4)
dataset_f = dataset_s.filter(predicate=filter_func_shuffle, num_parallel_workers=4) dataset_f = dataset_s.filter(predicate=filter_func_shuffle, num_parallel_workers=4)
@ -164,8 +192,12 @@ def filter_func_shuffle_after(data):
return data <= 20 return data <= 20
# test with batchOp after
def test_filter_by_generator_with_shuffle_after(): def test_filter_by_generator_with_shuffle_after():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Shuffle op after
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_f = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) dataset_f = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4)
dataset_s = dataset_f.shuffle(4) dataset_s = dataset_f.shuffle(4)
@ -194,8 +226,12 @@ def filter_func_zip_after(data1):
return data1 <= 20 return data1 <= 20
# test with zipOp before
def test_filter_by_generator_with_zip(): def test_filter_by_generator_with_zip():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Zip op before
Expectation: Output is equal to the expected output
"""
dataset1 = ds.GeneratorDataset(generator_1d_zip1, ["data1"]) dataset1 = ds.GeneratorDataset(generator_1d_zip1, ["data1"])
dataset2 = ds.GeneratorDataset(generator_1d_zip2, ["data2"]) dataset2 = ds.GeneratorDataset(generator_1d_zip2, ["data2"])
dataz = ds.zip((dataset1, dataset2)) dataz = ds.zip((dataset1, dataset2))
@ -212,8 +248,12 @@ def test_filter_by_generator_with_zip():
assert ret_data[5]["data2"] == 105 assert ret_data[5]["data2"] == 105
# test with zipOp after
def test_filter_by_generator_with_zip_after(): def test_filter_by_generator_with_zip_after():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Zip op after
Expectation: Output is equal to the expected output
"""
dataset1 = ds.GeneratorDataset(generator_1d_zip1, ["data1"]) dataset1 = ds.GeneratorDataset(generator_1d_zip1, ["data1"])
dataset2 = ds.GeneratorDataset(generator_1d_zip1, ["data2"]) dataset2 = ds.GeneratorDataset(generator_1d_zip1, ["data2"])
dt1 = dataset1.filter(predicate=filter_func_zip_after, num_parallel_workers=4) dt1 = dataset1.filter(predicate=filter_func_zip_after, num_parallel_workers=4)
@ -258,8 +298,12 @@ def func_map_part(data_col1):
return data_col1 return data_col1
# test with map
def test_filter_by_generator_with_map_all_col(): def test_filter_by_generator_with_map_all_col():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Map op before and Filter op is applied to all input columns
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"]) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"])
# dataset_map = dataset.map(operations=func_map_part) # dataset_map = dataset.map(operations=func_map_part)
@ -274,8 +318,13 @@ def test_filter_by_generator_with_map_all_col():
assert ret_data[1] == 1 assert ret_data[1] == 1
# test with map
def test_filter_by_generator_with_map_part_col(): def test_filter_by_generator_with_map_part_col():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Map op before.
Filter op is only applied partially to the input columns
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"])
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
@ -294,8 +343,12 @@ def filter_func_rename(data):
return data > 8 return data > 8
# test with rename before
def test_filter_by_generator_with_rename(): def test_filter_by_generator_with_rename():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Rename op before
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset_b = dataset.rename(input_columns=["data"], output_columns=["col1"]) dataset_b = dataset.rename(input_columns=["data"], output_columns=["col1"])
dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4) dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4)
@ -309,7 +362,6 @@ def test_filter_by_generator_with_rename():
assert ret_data[54] == 63 assert ret_data[54] == 63
# test input_column
def filter_func_input_column1(col1, col2): def filter_func_input_column1(col1, col2):
_ = col2 _ = col2
return col1[0] < 8 return col1[0] < 8
@ -324,8 +376,12 @@ def filter_func_input_column3(col1):
return True return True
# test with input_columns
def test_filter_by_generator_with_input_column(): def test_filter_by_generator_with_input_column():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with input columns
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"]) dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"])
dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"])
dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1, dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1,
@ -343,7 +399,6 @@ def test_filter_by_generator_with_input_column():
assert ret_data[7] == 7 assert ret_data[7] == 7
# test kFilterPartial
def generator_mc_p0(maxid=20): def generator_mc_p0(maxid=20):
for i in range(maxid): for i in range(maxid):
yield (np.array([i]), np.array([i + 100])) yield (np.array([i]), np.array([i + 100]))
@ -362,8 +417,13 @@ def filter_func_Partial_0(col1, col2, col3, col4):
return True return True
# test with row_data_buffer > 1
def test_filter_by_generator_Partial0(): def test_filter_by_generator_Partial0():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Zip op before.
Filter op is only partially applied on the input columns
Expectation: Output is equal to the expected output
"""
dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"]) dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"])
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
dataset_zip = ds.zip((dataset1, dataset2)) dataset_zip = ds.zip((dataset1, dataset2))
@ -375,8 +435,13 @@ def test_filter_by_generator_Partial0():
assert ret[6] == 12 assert ret[6] == 12
# test with row_data_buffer > 1
def test_filter_by_generator_Partial1(): def test_filter_by_generator_Partial1():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Zip op before and Map op after.
Filter op is only partially applied on the input columns
Expectation: Output is equal to the expected output
"""
dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"]) dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"])
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
dataset_zip = ds.zip((dataset1, dataset2)) dataset_zip = ds.zip((dataset1, dataset2))
@ -389,8 +454,13 @@ def test_filter_by_generator_Partial1():
assert ret[6] == 412 assert ret[6] == 412
# test with row_data_buffer > 1
def test_filter_by_generator_Partial2(): def test_filter_by_generator_Partial2():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Zip op after and Map op after the Zip op.
Filter op is only partially applied on the input columns
Expectation: Output is equal to the expected output
"""
dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"]) dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"])
dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"])
@ -421,8 +491,13 @@ def generator_big(maxid=20):
yield (np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]])) yield (np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]]))
# test with row_data_buffer > 1
def test_filter_by_generator_Partial(): def test_filter_by_generator_Partial():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Shuffle op before.
Filter op is only partially applied on the input columns
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(source=(lambda: generator_mc(99)), column_names=["col1", "col2"]) dataset = ds.GeneratorDataset(source=(lambda: generator_mc(99)), column_names=["col1", "col2"])
dataset_s = dataset.shuffle(4) dataset_s = dataset.shuffle(4)
dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1) dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1)
@ -436,8 +511,12 @@ def filter_func_cifar(col1, col2):
return col2 % 3 == 0 return col2 % 3 == 0
# test with cifar10
def test_filte_case_dataset_cifar10(): def test_filte_case_dataset_cifar10():
"""
Feature: Filter op
Description: Test Filter op using Cifar10Dataset
Expectation: Output is equal to the expected output
"""
DATA_DIR_10 = "../data/dataset/testCifar10Data" DATA_DIR_10 = "../data/dataset/testCifar10Data"
dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False) dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False)
dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1) dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1)
@ -446,8 +525,6 @@ def test_filte_case_dataset_cifar10():
assert item["label"] % 3 == 0 assert item["label"] % 3 == 0
# column id sort
def generator_sort1(maxid=20): def generator_sort1(maxid=20):
for i in range(maxid): for i in range(maxid):
yield (np.array([i]), np.array([i + 100]), np.array([i + 200])) yield (np.array([i]), np.array([i + 100]), np.array([i + 200]))
@ -468,6 +545,11 @@ def filter_func_map_sort(col1, col2, col3):
def test_filter_by_generator_with_map_all_sort(): def test_filter_by_generator_with_map_all_sort():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with Zip op before, Filter op is applied to all input columns
Expectation: Output is equal to the expected output
"""
dataset1 = ds.GeneratorDataset(generator_sort1(10), ["col1", "col2", "col3"]) dataset1 = ds.GeneratorDataset(generator_sort1(10), ["col1", "col2", "col3"])
dataset2 = ds.GeneratorDataset(generator_sort2(10), ["col4 ", "col5", "col6"]) dataset2 = ds.GeneratorDataset(generator_sort2(10), ["col4 ", "col5", "col6"])
@ -485,6 +567,11 @@ def test_filter_by_generator_with_map_all_sort():
def test_filter_by_generator_get_dataset_size(): def test_filter_by_generator_get_dataset_size():
"""
Feature: Filter op
Description: Test Filter op using GeneratorDataset with get_dataset_size after
Expectation: Output is equal to the expected output
"""
dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = ds.GeneratorDataset(generator_1d, ["data"])
dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4)
data_sie = dataset.get_dataset_size() data_sie = dataset.get_dataset_size()

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -164,7 +164,11 @@ def add_and_remove_nlp_compress_file():
def test_nlp_compress_data(add_and_remove_nlp_compress_file): def test_nlp_compress_data(add_and_remove_nlp_compress_file):
"""tutorial for nlp minderdataset.""" """
Feature: MindDataset
Description: Test compressing NLP MindDataset
Expectation: Output is equal to the expected output
"""
data = [] data = []
for row_id in range(16): for row_id in range(16):
data.append({ data.append({
@ -196,7 +200,11 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
def test_cv_minddataset_writer_tutorial(): def test_cv_minddataset_writer_tutorial():
"""tutorial for cv dataset writer.""" """
Feature: MindDataset
Description: Test MindDataset FileWriter basic usage
Expectation: Runs successfully
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
@ -226,7 +234,11 @@ def test_cv_minddataset_writer_tutorial():
def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards and shard_id) on MindDataset basic usage
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -249,7 +261,11 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards and shard_id) on MindDataset with num_samples=1
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -275,7 +291,12 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards and shard_id) on MindDataset
with num_samples > 1 but num_samples <= dataset size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -301,7 +322,12 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards and shard_id) on MindDataset
with num_samples > 1 but num_samples > dataset size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -325,8 +351,14 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
assert partitions(5, 2) == 2 assert partitions(5, 2) == 2
assert partitions(9, 2) == 2 assert partitions(9, 2) == 2
def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards=1 and shard_id) on MindDataset
with num_samples > 1 and num_samples = dataset size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -342,8 +374,14 @@ def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file):
assert num_iter == 5 assert num_iter == 5
def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_cv_file): def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards=1 and shard_id) on MindDataset
and check that the result is not shuffled
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
num_shards = 3 num_shards = 3
@ -383,7 +421,12 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoch(add_and_remove_cv_file): def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoch(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test partition (using num_shards=1 and shard_id) on MindDataset
and check that the whole result under multiple epochs is not shuffled
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -419,7 +462,11 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test read on MindDataset after Repeat op is applied and check that the result is not shuffled
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -514,7 +561,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
def test_cv_minddataset_dataset_size(add_and_remove_cv_file): def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test get_dataset_size on MindDataset
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -538,7 +589,11 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test read on MindDataset where after multiple Map ops and repeat op the result is not shuffled
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "label"] columns_list = ["data", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -570,7 +625,11 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test MindDataset when batch_size in Batch op is larger than records
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "label"] columns_list = ["data", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -595,7 +654,11 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
def test_cv_minddataset_issue_888(add_and_remove_cv_file): def test_cv_minddataset_issue_888(add_and_remove_cv_file):
"""issue 888 test.""" """
Feature: MindDataset
Description: Test MindDataset by applying Shuffle op followed by Repeat op
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "label"] columns_list = ["data", "label"]
num_readers = 2 num_readers = 2
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -609,7 +672,11 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file):
def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): def test_cv_minddataset_reader_file_list(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read on MindDataset using list of files
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -633,7 +700,11 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file):
def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read on MindDataset using list of one file
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -656,19 +727,23 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord" Feature: MindDataset
CV2_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord" Description: Test read on MindDataset using 2 datasets that are written with FileWriter
Expectation: Output is equal to the expected output
"""
cv1_file_name = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord"
cv2_file_name = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord"
try: try:
if os.path.exists(CV1_FILE_NAME): if os.path.exists(cv1_file_name):
os.remove(CV1_FILE_NAME) os.remove(cv1_file_name)
if os.path.exists("{}.db".format(CV1_FILE_NAME)): if os.path.exists("{}.db".format(cv1_file_name)):
os.remove("{}.db".format(CV1_FILE_NAME)) os.remove("{}.db".format(cv1_file_name))
if os.path.exists(CV2_FILE_NAME): if os.path.exists(cv2_file_name):
os.remove(CV2_FILE_NAME) os.remove(cv2_file_name)
if os.path.exists("{}.db".format(CV2_FILE_NAME)): if os.path.exists("{}.db".format(cv2_file_name)):
os.remove("{}.db".format(CV2_FILE_NAME)) os.remove("{}.db".format(cv2_file_name))
writer = FileWriter(CV1_FILE_NAME, 1) writer = FileWriter(cv1_file_name, 1)
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"}, cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"}, "file_name": {"type": "string"},
@ -679,7 +754,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
writer.write_raw_data(data) writer.write_raw_data(data)
writer.commit() writer.commit()
writer = FileWriter(CV2_FILE_NAME, 1) writer = FileWriter(cv2_file_name, 1)
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"}, cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"}, "file_name": {"type": "string"},
@ -692,7 +767,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME], data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [cv1_file_name, cv2_file_name],
columns_list, num_readers) columns_list, num_readers)
assert data_set.get_dataset_size() == 30 assert data_set.get_dataset_size() == 30
num_iter = 0 num_iter = 0
@ -710,29 +785,34 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
num_iter += 1 num_iter += 1
assert num_iter == 30 assert num_iter == 30
except Exception as error: except Exception as error:
if os.path.exists(CV1_FILE_NAME): if os.path.exists(cv1_file_name):
os.remove(CV1_FILE_NAME) os.remove(cv1_file_name)
if os.path.exists("{}.db".format(CV1_FILE_NAME)): if os.path.exists("{}.db".format(cv1_file_name)):
os.remove("{}.db".format(CV1_FILE_NAME)) os.remove("{}.db".format(cv1_file_name))
if os.path.exists(CV2_FILE_NAME): if os.path.exists(cv2_file_name):
os.remove(CV2_FILE_NAME) os.remove(cv2_file_name)
if os.path.exists("{}.db".format(CV2_FILE_NAME)): if os.path.exists("{}.db".format(cv2_file_name)):
os.remove("{}.db".format(CV2_FILE_NAME)) os.remove("{}.db".format(cv2_file_name))
raise error raise error
else: else:
if os.path.exists(CV1_FILE_NAME): if os.path.exists(cv1_file_name):
os.remove(CV1_FILE_NAME) os.remove(cv1_file_name)
if os.path.exists("{}.db".format(CV1_FILE_NAME)): if os.path.exists("{}.db".format(cv1_file_name)):
os.remove("{}.db".format(CV1_FILE_NAME)) os.remove("{}.db".format(cv1_file_name))
if os.path.exists(CV2_FILE_NAME): if os.path.exists(cv2_file_name):
os.remove(CV2_FILE_NAME) os.remove(cv2_file_name)
if os.path.exists("{}.db".format(CV2_FILE_NAME)): if os.path.exists("{}.db".format(cv2_file_name)):
os.remove("{}.db".format(CV2_FILE_NAME)) os.remove("{}.db".format(cv2_file_name))
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1" """
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) Feature: MindDataset
Description: Test read on MindDataset using two datasets that are partitioned into two lists
Expectation: Output is equal to the expected output
"""
cv1_file_name = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1"
paths = ["{}{}".format(cv1_file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
try: try:
for x in paths: for x in paths:
@ -740,7 +820,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
os.remove("{}".format(x)) os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)): if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x)) os.remove("{}.db".format(x))
writer = FileWriter(CV1_FILE_NAME, FILES_NUM) writer = FileWriter(cv1_file_name, FILES_NUM)
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"}, cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"}, "file_name": {"type": "string"},
@ -755,7 +835,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset([file_name + str(x) for x in range(2)] + data_set = ds.MindDataset([file_name + str(x) for x in range(2)] +
[CV1_FILE_NAME + str(x) for x in range(2, 4)], [cv1_file_name + str(x) for x in range(2, 4)],
columns_list, num_readers) columns_list, num_readers)
assert data_set.get_dataset_size() < 20 assert data_set.get_dataset_size() < 20
num_iter = 0 num_iter = 0
@ -784,7 +864,11 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read on MindDataset tutorial
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -807,7 +891,11 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
"""tutorial for nlp minderdataset.""" """
Feature: MindDataset
Description: Test basic read on NLP MindDataset tutorial
Expectation: Output is equal to the expected output
"""
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", None, num_readers) data_set = ds.MindDataset(file_name + "0", None, num_readers)
@ -836,7 +924,11 @@ def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read on MindDataset tutorial with 5 epochs
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -853,7 +945,11 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file):
def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read on MindDataset tutorial with 5 epochs after Batch op
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "label"] columns_list = ["data", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -882,7 +978,11 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_
def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read on MindDataset with no columns_list
Expectation: Output is equal to the expected output
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0") data_set = ds.MindDataset(file_name + "0")
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
@ -903,7 +1003,11 @@ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file):
def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read on MindDataset after Repeat op is applied on the dataset
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -1117,6 +1221,11 @@ def inputs(vectors, maxlen=50):
def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
"""
Feature: MindDataset
Description: Test write multiple bytes and arrays using FileWriter and read them by MindDataset
Expectation: Output is equal to the expected output
"""
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
try: try:
if os.path.exists("{}".format(mindrecord_file_name)): if os.path.exists("{}".format(mindrecord_file_name)):
@ -1373,6 +1482,11 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
def test_write_with_multi_bytes_and_MindDataset(): def test_write_with_multi_bytes_and_MindDataset():
"""
Feature: MindDataset
Description: Test write multiple bytes using FileWriter and read them by MindDataset
Expectation: Output is equal to the expected output
"""
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
try: try:
data = [{"file_name": "001.jpg", "label": 43, data = [{"file_name": "001.jpg", "label": 43,
@ -1554,6 +1668,11 @@ def test_write_with_multi_bytes_and_MindDataset():
def test_write_with_multi_array_and_MindDataset(): def test_write_with_multi_array_and_MindDataset():
"""
Feature: MindDataset
Description: Test write multiple arrays using FileWriter and read them by MindDataset
Expectation: Output is equal to the expected output
"""
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
try: try:
data = [{"source_sos_ids": np.array([1, 2, 3, 4, 5], dtype=np.int64), data = [{"source_sos_ids": np.array([1, 2, 3, 4, 5], dtype=np.int64),
@ -1757,6 +1876,11 @@ def test_write_with_multi_array_and_MindDataset():
def test_numpy_generic(): def test_numpy_generic():
"""
Feature: MindDataset
Description: Test write numpy generic data types using FileWriter and read them by MindDataset
Expectation: Output is equal to the expected output
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
@ -1804,6 +1928,12 @@ def test_numpy_generic():
def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(): def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset():
"""
Feature: MindDataset
Description: Test write float32, float64, array of float32, and array of float64 using
FileWriter and read them by MindDataset
Expectation: Output is equal to the expected output
"""
mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord" mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord"
try: try:
data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32), data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32),
@ -1996,7 +2126,13 @@ def create_multi_mindrecord_files():
os.remove("{}".format(filename)) os.remove("{}".format(filename))
os.remove("{}.db".format(filename)) os.remove("{}.db".format(filename))
def test_shuffle_with_global_infile_files(create_multi_mindrecord_files): def test_shuffle_with_global_infile_files(create_multi_mindrecord_files):
"""
Feature: MindDataset
Description: Test without and with shuffle args for MindDataset
Expectation: Output is equal to the expected output
"""
ds.config.set_seed(1) ds.config.set_seed(1)
datas_all = [] datas_all = []
index = 0 index = 0
@ -2233,7 +2369,13 @@ def test_shuffle_with_global_infile_files(create_multi_mindrecord_files):
shard_count += 1 shard_count += 1
assert origin_index != current_index assert origin_index != current_index
def test_distributed_shuffle_with_global_infile_files(create_multi_mindrecord_files): def test_distributed_shuffle_with_global_infile_files(create_multi_mindrecord_files):
"""
Feature: MindDataset
Description: Test distributed MindDataset (with num_shards and shard_id) without and with shuffle args
Expectation: Output is equal to the expected output
"""
ds.config.set_seed(1) ds.config.set_seed(1)
datas_all = [] datas_all = []
datas_all_samples = [] datas_all_samples = []
@ -2425,7 +2567,14 @@ def test_distributed_shuffle_with_global_infile_files(create_multi_mindrecord_fi
shard_count += 1 shard_count += 1
assert origin_index != current_index assert origin_index != current_index
def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files): def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files):
"""
Feature: MindDataset
Description: Test distributed MindDataset (with num_shards and shard_id)
without and with shuffle args under multiple epochs
Expectation: Output is equal to the expected output
"""
ds.config.set_seed(1) ds.config.set_seed(1)
datas_all = [] datas_all = []
datas_all_samples = [] datas_all_samples = []
@ -2588,8 +2737,13 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files):
assert datas_epoch2 not in (datas_epoch1, datas_epoch3) assert datas_epoch2 not in (datas_epoch1, datas_epoch3)
assert datas_epoch3 not in (datas_epoch2, datas_epoch1) assert datas_epoch3 not in (datas_epoch2, datas_epoch1)
def test_field_is_null_numpy(): def test_field_is_null_numpy():
"""add/remove nlp file""" """
Feature: MindDataset
Description: Test MindDataset when field array_d is null
Expectation: Output is equal to the expected output
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
@ -2655,8 +2809,13 @@ def test_field_is_null_numpy():
os.remove("{}".format(x)) os.remove("{}".format(x))
os.remove("{}.db".format(x)) os.remove("{}.db".format(x))
def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file): def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file):
"""test for loop dataset iterator""" """
Feature: MindDataset
Description: Test for loop for iterator based on MindDataset
Expectation: Output is equal to the expected output
"""
data = [] data = []
for row_id in range(16): for row_id in range(16):
data.append({ data.append({

View File

@ -74,7 +74,11 @@ def create_diff_page_size_cv_mindrecord(file_name, files_num):
def test_cv_lack_json(): def test_cv_lack_json():
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test MindDataset using json file that does not exist
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
@ -87,7 +91,11 @@ def test_cv_lack_json():
def test_cv_lack_mindrecord(): def test_cv_lack_mindrecord():
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test MindDataset using mindrecord that does not exist or no permission
Expectation: Exception is raised as expected
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
with pytest.raises(Exception, match="does not exist or permission denied"): with pytest.raises(Exception, match="does not exist or permission denied"):
@ -95,6 +103,11 @@ def test_cv_lack_mindrecord():
def test_invalid_mindrecord(): def test_invalid_mindrecord():
"""
Feature: MindDataset
Description: Test MindDataset using invalid file (size of mindrecord file header is larger than the upper limit)
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
with open(file_name, 'w') as f: with open(file_name, 'w') as f:
f.write('just for test') f.write('just for test')
@ -109,6 +122,11 @@ def test_invalid_mindrecord():
def test_minddataset_lack_db(): def test_minddataset_lack_db():
"""
Feature: MindDataset
Description: Test MindDataset without .db files
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
os.remove("{}.db".format(file_name)) os.remove("{}.db".format(file_name))
@ -140,6 +158,11 @@ def test_cv_minddataset_pk_sample_error_class_column():
def test_cv_minddataset_pk_sample_exclusive_shuffle(): def test_cv_minddataset_pk_sample_exclusive_shuffle():
"""
Feature: MindDataset
Description: Test MindDataset by specifying sampler and shuffle at the same time
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
@ -156,6 +179,11 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle():
def test_cv_minddataset_reader_different_schema(): def test_cv_minddataset_reader_different_schema():
"""
Feature: MindDataset
Description: Test MindDataset by including a file that has a different schema from the others
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_1 = file_name + '_1' file_name_1 = file_name + '_1'
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
@ -177,6 +205,11 @@ def test_cv_minddataset_reader_different_schema():
def test_cv_minddataset_reader_different_page_size(): def test_cv_minddataset_reader_different_page_size():
"""
Feature: MindDataset
Description: Test MindDataset where one of the files has a different page size
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_1 = file_name + '_1' file_name_1 = file_name + '_1'
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
@ -199,6 +232,11 @@ def test_cv_minddataset_reader_different_page_size():
def test_minddataset_invalidate_num_shards(): def test_minddataset_invalidate_num_shards():
"""
Feature: MindDataset
Description: Test MindDataset where num_shards is invalid
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
columns_list = ["data", "label"] columns_list = ["data", "label"]
@ -222,6 +260,11 @@ def test_minddataset_invalidate_num_shards():
def test_minddataset_invalidate_shard_id(): def test_minddataset_invalidate_shard_id():
"""
Feature: MindDataset
Description: Test MindDataset where shard_id is invalid
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
columns_list = ["data", "label"] columns_list = ["data", "label"]
@ -245,6 +288,11 @@ def test_minddataset_invalidate_shard_id():
def test_minddataset_shard_id_bigger_than_num_shard(): def test_minddataset_shard_id_bigger_than_num_shard():
"""
Feature: MindDataset
Description: Test MindDataset where shard_id is bigger than num_shards
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
columns_list = ["data", "label"] columns_list = ["data", "label"]
@ -282,7 +330,11 @@ def test_minddataset_shard_id_bigger_than_num_shard():
def test_cv_minddataset_partition_num_samples_equals_0(): def test_cv_minddataset_partition_num_samples_equals_0():
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test MindDataset where num_samples is invalid
Expectation: Exception is raised as expected
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(file_name, 1) create_cv_mindrecord(file_name, 1)
columns_list = ["data", "label"] columns_list = ["data", "label"]
@ -312,8 +364,11 @@ def test_cv_minddataset_partition_num_samples_equals_0():
def test_mindrecord_exception(): def test_mindrecord_exception():
"""tutorial for exception scenario of minderdataset + map would print error info.""" """
Feature: MindDataset
Description: Test MindDataset by mapping function that will raise Exception and print error info
Expectation: Exception is raised as expected
"""
def exception_func(item): def exception_func(item):
raise Exception("Error occur!") raise Exception("Error occur!")

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -107,7 +107,11 @@ def add_and_remove_nlp_file():
def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read on MindDataset with padded_sample
Expectation: Output is equal to the expected output
"""
columns_list = ["label", "file_name", "data"] columns_list = ["label", "file_name", "data"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -135,7 +139,11 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
assert num_iter == 15 assert num_iter == 15
def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read on MindDataset with padded_sample which file_name requires type cast
Expectation: Output is equal to the expected output
"""
columns_list = ["label", "file_name", "data"] columns_list = ["label", "file_name", "data"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -164,7 +172,11 @@ def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_
def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test read on MindDataset with padded_sample and partition (num_shards and shard_id)
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -205,7 +217,12 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file):
def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test read on MindDataset with padded_sample and partition (num_shards and shard_id),
performed under multiple epochs
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -278,7 +295,12 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f
def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file):
"""tutorial for cv minddataset.""" """
Feature: MindDataset
Description: Test read on MindDataset with padded_sample and partition (num_shards and shard_id),
where num_padded is not divisible
Expectation: Error is raised as expected
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -305,6 +327,12 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv
def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test get_dataset_size during MindDataset read with padded_sample and partition
(num_shards and shard_id), where num_padded is not divisible
Expectation: Error is raised as expected
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -328,6 +356,12 @@ def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_a
def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with padded_sample and partition
(num_shards and shard_id), where padded_sample does not match columns_list
Expectation: Error is raised as expected
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
@ -355,6 +389,12 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re
def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with padded_sample and partition
(num_shards and shard_id), where there is no columns_list
Expectation: Error is raised as expected
"""
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
padded_sample = data[0] padded_sample = data[0]
padded_sample['label'] = -2 padded_sample['label'] = -2
@ -380,6 +420,12 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c
def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with padded_sample and partition
(num_shards and shard_id), where there is no num_padded
Expectation: Error is raised as expected
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
padded_sample = data[0] padded_sample = data[0]
@ -404,6 +450,12 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv
def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with padded_sample and partition
(num_shards and shard_id), where there is no padded_sample
Expectation: Error is raised as expected
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
data = get_data(CV_DIR_NAME) data = get_data(CV_DIR_NAME)
padded_sample = data[0] padded_sample = data[0]
@ -428,6 +480,11 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov
def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file):
"""
Feature: MindDataset
Description: Test basic read MindDataset with padded_sample from raw data of aclImdb dataset
Expectation: Output is equal to the expected output
"""
columns_list = ["input_ids", "id", "rating"] columns_list = ["input_ids", "id", "rating"]
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
@ -469,6 +526,11 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file):
def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file): def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file):
"""
Feature: MindDataset
Description: Test basic read MindDataset with padded_sample from raw data of aclImdb dataset under multiple epochs
Expectation: Output is equal to the expected output
"""
columns_list = ["input_ids", "id", "rating"] columns_list = ["input_ids", "id", "rating"]
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
@ -535,6 +597,12 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_
def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file): def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file):
"""
Feature: MindDataset
Description: Test basic read MindDataset with padded_sample from raw data of aclImdb dataset
by checking whole result_per_epoch to ensure there is no reshuffling
Expectation: Output is equal to the expected output
"""
columns_list = ["input_ids", "id", "rating"] columns_list = ["input_ids", "id", "rating"]
padded_sample = {} padded_sample = {}

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -62,7 +62,11 @@ def add_and_remove_cv_file():
def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler without any columns_list in the dataset
Expectation: Output is equal to the expected output
"""
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(2) sampler = ds.PKSampler(2)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -82,7 +86,11 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read MindDataset with PKSampler
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(2) sampler = ds.PKSampler(2)
@ -105,7 +113,11 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler with shuffle=True
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(3, None, True) sampler = ds.PKSampler(3, None, True)
@ -127,7 +139,12 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler with shuffle=True and
with num_samples larger than get_dataset_size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(3, None, True, 'label', 5) sampler = ds.PKSampler(3, None, True, 'label', 5)
@ -149,7 +166,12 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler with shuffle=True and
with num_samples larger than get_dataset_size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(3, None, True, 'label', 10) sampler = ds.PKSampler(3, None, True, 'label', 10)
@ -171,7 +193,11 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler with shuffle=True and num_val that is out of range
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(5, None, True) sampler = ds.PKSampler(5, None, True)
@ -192,7 +218,12 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler with shuffle=True, num_val that is out of range, and
num_samples larger than get_dataset_size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(5, None, True, 'label', 20) sampler = ds.PKSampler(5, None, True, 'label', 20)
@ -213,7 +244,12 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file):
def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with PKSampler with shuffle=True, num_val that is out of range, and
num_samples that is equal to get_dataset_size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
sampler = ds.PKSampler(5, None, True, 'label', 10) sampler = ds.PKSampler(5, None, True, 'label', 10)
@ -234,7 +270,11 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file):
def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test basic read MindDataset with SubsetRandomSampler
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -259,7 +299,11 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file):
def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with SubsetRandomSampler with duplicate index in the indices
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
indices = [1, 2, 2, 5, 7, 9] indices = [1, 2, 2, 5, 7, 9]
@ -284,7 +328,11 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file):
def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with SubsetRandomSampler with empty indices
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
indices = [] indices = []
@ -309,7 +357,11 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file):
def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """
Feature: MindDataset
Description: Test read MindDataset with SubsetRandomSampler with indices that are out of range
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
indices = [1, 2, 4, 11, 13] indices = [1, 2, 4, 11, 13]
@ -334,6 +386,11 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file
def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with SubsetRandomSampler with negative indices
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
indices = [1, 2, 4, -1, -2] indices = [1, 2, 4, -1, -2]
@ -358,6 +415,11 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test basic read MindDataset with RandomSampler
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -384,6 +446,11 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with RandomSampler followed by Repeat op
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -419,6 +486,11 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with RandomSampler with replacement=True
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -441,6 +513,11 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with RandomSampler with replacement=False and num_samples <= dataset size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -463,6 +540,11 @@ def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_fil
def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with RandomSampler with replacement=False and num_samples > dataset size
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -485,6 +567,11 @@ def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_fil
def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test basic read MindDataset with SequentialSampler
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -510,6 +597,11 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file):
def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file): def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with SequentialSampler with offset on starting index
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -536,6 +628,12 @@ def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file):
def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with SequentialSampler with offset on starting index and
num_samples > dataset size
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -562,6 +660,11 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file):
def test_cv_minddataset_split_basic(add_and_remove_cv_file): def test_cv_minddataset_split_basic(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test basic read MindDataset after Split op is applied
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -599,6 +702,11 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file):
def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset after Split op is applied using exact percentages
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -636,6 +744,11 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file):
def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset after Split op is applied using fuzzy percentages
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
@ -673,6 +786,11 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): def test_cv_minddataset_split_deterministic(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset after deterministic Split op is applied
Expectation: Output is equal to the expected output
"""
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
@ -714,6 +832,11 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file):
def test_cv_minddataset_split_sharding(add_and_remove_cv_file): def test_cv_minddataset_split_sharding(add_and_remove_cv_file):
"""
Feature: MindDataset
Description: Test read MindDataset with DistributedSampler after deterministic Split op is applied
Expectation: Output is equal to the expected output
"""
data = get_data(CV_DIR_NAME, True) data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"] columns_list = ["data", "file_name", "label"]
num_readers = 4 num_readers = 4

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -26,6 +26,11 @@ MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8"
def test_on_tokenized_line(): def test_on_tokenized_line():
"""
Feature: Python text.Vocab class
Description: Test Lookup op on tokenized line using JiebaTokenizer with special_tokens
Expectation: Output is equal to the expected output
"""
data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False) data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False)
jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP) jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP)
with open(VOCAB_FILE, 'r') as f: with open(VOCAB_FILE, 'r') as f:
@ -43,6 +48,11 @@ def test_on_tokenized_line():
def test_on_tokenized_line_with_no_special_tokens(): def test_on_tokenized_line_with_no_special_tokens():
"""
Feature: Python text.Vocab class
Description: Test Lookup op on tokenized line using JiebaTokenizer without special_tokens
Expectation: Output is equal to the expected output
"""
data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False) data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False)
jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP) jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP)
with open(VOCAB_FILE, 'r') as f: with open(VOCAB_FILE, 'r') as f:

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -22,6 +22,11 @@ from mindspore import context
DATA_DIR = "../data/dataset/testVOC2012" DATA_DIR = "../data/dataset/testVOC2012"
def test_noop_pserver(): def test_noop_pserver():
"""
Feature: No-op mode
Description: Test No-op mode support where the MS_ROLE environment is MS_PSERVER
Expectation: Runs successfully
"""
os.environ['MS_ROLE'] = 'MS_PSERVER' os.environ['MS_ROLE'] = 'MS_PSERVER'
context.set_ps_context(enable_ps=True) context.set_ps_context(enable_ps=True)
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
@ -34,6 +39,11 @@ def test_noop_pserver():
def test_noop_sched(): def test_noop_sched():
"""
Feature: No-op mode
Description: Test No-op mode support where the MS_ROLE environment is MS_SCHED
Expectation: Runs successfully
"""
os.environ['MS_ROLE'] = 'MS_SCHED' os.environ['MS_ROLE'] = 'MS_SCHED'
context.set_ps_context(enable_ps=True) context.set_ps_context(enable_ps=True)
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -21,6 +21,11 @@ import mindspore.dataset as ds
# map dataset with columns order arguments should produce a ProjectOp over MapOp # map dataset with columns order arguments should produce a ProjectOp over MapOp
# This test does not utilize the compiling passes at this time. # This test does not utilize the compiling passes at this time.
def test_map_reorder0(): def test_map_reorder0():
"""
Feature: Map op
Description: Test Map op by applying operation lambda x: x on GeneratorDataset
Expectation: Output is equal to the expected output
"""
def generator_mc(maxid=1): def generator_mc(maxid=1):
for _ in range(maxid): for _ in range(maxid):
yield (np.array([0]), np.array([1])) yield (np.array([0]), np.array([1]))
@ -39,6 +44,11 @@ def test_map_reorder0():
# map dataset with columns order arguments should produce a ProjectOp over MapOp # map dataset with columns order arguments should produce a ProjectOp over MapOp
# This test does not utilize the compiling passes at this time. # This test does not utilize the compiling passes at this time.
def test_map_reorder1(): def test_map_reorder1():
"""
Feature: Map op
Description: Test Map op on 2 mapped GeneratorDatasets that are zipped
Expectation: Output is equal to the expected output
"""
def generator_mc(maxid=1): def generator_mc(maxid=1):
for _ in range(maxid): for _ in range(maxid):
yield (np.array([0]), np.array([1]), np.array([2])) yield (np.array([0]), np.array([1]), np.array([2]))
@ -59,6 +69,11 @@ def test_map_reorder1():
# TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp. # TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp.
# This test does not utilize the compiling passes at this time. # This test does not utilize the compiling passes at this time.
def test_shuffle(): def test_shuffle():
"""
Feature: Shuffle op
Description: Test one dataset with Shuffle.GLOBAL with another dataset with Shuffle.FILES followed by shuffle op
Expectation: Both datasets should be equal
"""
FILES = ["../data/dataset/testTFTestAllTypes/test.data"] FILES = ["../data/dataset/testTFTestAllTypes/test.data"]
SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json" SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
@ -98,4 +113,4 @@ def test_shuffle():
if __name__ == "__main__": if __name__ == "__main__":
test_map_reorder0() test_map_reorder0()
test_map_reorder1() test_map_reorder1()
test_global_shuffle() test_shuffle()

View File

@ -60,6 +60,11 @@ def gen_var_cols_2d(num):
def test_batch_padding_01(): def test_batch_padding_01():
"""
Feature: Batch Padding
Description: Test batch padding where input_shape=[x] and output_shape=[y] in which y > x
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)}) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)})
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -69,6 +74,12 @@ def test_batch_padding_01():
def test_batch_padding_02(): def test_batch_padding_02():
"""
Feature: Batch Padding
Description: Test batch padding where padding in one dimension and truncate in the other, in which
input_shape=[x1,x2] and output_shape=[y1,y2] and y1 > x1 and y2 < x2
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)}) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)})
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -78,6 +89,11 @@ def test_batch_padding_02():
def test_batch_padding_03(): def test_batch_padding_03():
"""
Feature: Batch Padding
Description: Test batch padding using automatic padding for a specific column
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"]) data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -91,6 +107,11 @@ def test_batch_padding_03():
def test_batch_padding_04(): def test_batch_padding_04():
"""
Feature: Batch Padding
Description: Test batch padding using default setting for all columns
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -100,6 +121,11 @@ def test_batch_padding_04():
def test_batch_padding_05(): def test_batch_padding_05():
"""
Feature: Batch Padding
Description: Test batch padding where None is in different places
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"])
data1 = data1.batch(batch_size=3, drop_remainder=False, data1 = data1.batch(batch_size=3, drop_remainder=False,
pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -35,6 +35,11 @@ def pad_compare(array, pad_shape, pad_value, res):
def test_pad_end_basics(): def test_pad_end_basics():
"""
Feature: PadEnd op
Description: Test PadEnd op basic usage with array of ints
Expectation: Output is equal to the expected output
"""
pad_compare([1, 2], [3], -1, [1, 2, -1]) pad_compare([1, 2], [3], -1, [1, 2, -1])
pad_compare([1, 2, 3], [3], -1, [1, 2, 3]) pad_compare([1, 2, 3], [3], -1, [1, 2, 3])
pad_compare([1, 2, 3], [2], -1, [1, 2]) pad_compare([1, 2, 3], [2], -1, [1, 2])
@ -42,6 +47,11 @@ def test_pad_end_basics():
def test_pad_end_str(): def test_pad_end_str():
"""
Feature: PadEnd op
Description: Test PadEnd op basic usage with array of strings
Expectation: Output is equal to the expected output
"""
pad_compare([b"1", b"2"], [3], b"-1", [b"1", b"2", b"-1"]) pad_compare([b"1", b"2"], [3], b"-1", [b"1", b"2", b"-1"])
pad_compare([b"1", b"2", b"3"], [3], b"-1", [b"1", b"2", b"3"]) pad_compare([b"1", b"2", b"3"], [3], b"-1", [b"1", b"2", b"3"])
pad_compare([b"1", b"2", b"3"], [2], b"-1", [b"1", b"2"]) pad_compare([b"1", b"2", b"3"], [2], b"-1", [b"1", b"2"])
@ -49,6 +59,11 @@ def test_pad_end_str():
def test_pad_end_exceptions(): def test_pad_end_exceptions():
"""
Feature: PadEnd op
Description: Test PadEnd op with invalid inputs
Expectation: Correct error is raised as expected
"""
with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
pad_compare([1, 2], [3], "-1", []) pad_compare([1, 2], [3], "-1", [])
assert "pad_value and item of dataset are not of the same type" in str(info.value) assert "pad_value and item of dataset are not of the same type" in str(info.value)

View File

@ -1,3 +1,18 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from io import BytesIO from io import BytesIO
import copy import copy
import os import os
@ -39,13 +54,18 @@ def generator_30():
def test_TFRecord_Padded(): def test_TFRecord_Padded():
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] """
SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" Feature: PaddedDataset
Description: Test padding PaddedDataset on TFRecordDataset
Expectation: Output is equal to the expected output
"""
data_dir = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
schema_dir = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
result_list = [[159109, 2], [192607, 3], [179251, 4], [1, 5]] result_list = [[159109, 2], [192607, 3], [179251, 4], [1, 5]]
verify_list = [] verify_list = []
shard_num = 4 shard_num = 4
for i in range(shard_num): for i in range(shard_num):
data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], data = ds.TFRecordDataset(data_dir, schema_dir, columns_list=["image"],
shuffle=False, shard_equal_rows=True) shuffle=False, shard_equal_rows=True)
padded_samples = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)}, padded_samples = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)},
@ -64,6 +84,11 @@ def test_TFRecord_Padded():
def test_GeneratorDataSet_Padded(): def test_GeneratorDataSet_Padded():
"""
Feature: PaddedDataset
Description: Test padding GeneratorDataset with another GeneratorDataset
Expectation: Output is equal to the expected output
"""
result_list = [] result_list = []
for i in range(10): for i in range(10):
tem_list = [] tem_list = []
@ -88,6 +113,11 @@ def test_GeneratorDataSet_Padded():
def test_Reapeat_afterPadded(): def test_Reapeat_afterPadded():
"""
Feature: PaddedDataset
Description: Test padding PaddedDataset with another PaddedDataset
Expectation: Output is equal to the expected output
"""
result_list = [1, 3, 5, 7] result_list = [1, 3, 5, 7]
verify_list = [] verify_list = []
@ -112,6 +142,11 @@ def test_Reapeat_afterPadded():
def test_bath_afterPadded(): def test_bath_afterPadded():
"""
Feature: PaddedDataset
Description: Test padding PaddedDataset with another PaddedDataset followed by batch op
Expectation: Output is equal to the expected output
"""
data1 = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, data1 = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)},
{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)},
{'image': np.zeros(1, np.uint8)}] {'image': np.zeros(1, np.uint8)}]
@ -130,6 +165,11 @@ def test_bath_afterPadded():
def test_Unevenly_distributed(): def test_Unevenly_distributed():
"""
Feature: PaddedDataset
Description: Test padding PaddedDataset with another PaddedDataset that is unevenly distributed
Expectation: Output is equal to the expected output
"""
result_list = [[1, 4, 7], [2, 5, 8], [3, 6]] result_list = [[1, 4, 7], [2, 5, 8], [3, 6]]
verify_list = [] verify_list = []
@ -156,6 +196,11 @@ def test_Unevenly_distributed():
def test_three_datasets_connected(): def test_three_datasets_connected():
"""
Feature: PaddedDataset
Description: Test padding 3 connected GeneratorDatasets
Expectation: Output is equal to the expected output
"""
result_list = [] result_list = []
for i in range(10): for i in range(10):
tem_list = [] tem_list = []
@ -182,6 +227,11 @@ def test_three_datasets_connected():
def test_raise_error(): def test_raise_error():
"""
Feature: PaddedDataset
Description: Test padding a PaddedDataset after a batch op with a PaddedDataset, then apply sampler op
Expectation: Correct error is raised as expected
"""
data1 = [{'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}, data1 = [{'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)},
{'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)},
{'image': np.zeros(0, np.uint8)}] {'image': np.zeros(0, np.uint8)}]
@ -214,8 +264,13 @@ def test_raise_error():
assert excinfo.type == 'ValueError' assert excinfo.type == 'ValueError'
def test_imagefolder_error(): def test_imagefolder_error():
DATA_DIR = "../data/dataset/testPK/data" """
data = ds.ImageFolderDataset(DATA_DIR, num_samples=14) Feature: PaddedDataset
Description: Test padding an ImageFolderDataset with num_samples with PaddedDataset
Expectation: Error is raised as expected
"""
data_dir = "../data/dataset/testPK/data"
data = ds.ImageFolderDataset(data_dir, num_samples=14)
data1 = [{'image': np.zeros(1, np.uint8), 'label': np.array(0, np.int32)}, data1 = [{'image': np.zeros(1, np.uint8), 'label': np.array(0, np.int32)},
{'image': np.zeros(2, np.uint8), 'label': np.array(1, np.int32)}, {'image': np.zeros(2, np.uint8), 'label': np.array(1, np.int32)},
@ -232,8 +287,13 @@ def test_imagefolder_error():
assert excinfo.type == 'ValueError' assert excinfo.type == 'ValueError'
def test_imagefolder_padded(): def test_imagefolder_padded():
DATA_DIR = "../data/dataset/testPK/data" """
data = ds.ImageFolderDataset(DATA_DIR) Feature: PaddedDataset
Description: Test padding an ImageFolderDataset without num_samples with PaddedDataset
Expectation: Output is equal to the expected output
"""
data_dir = "../data/dataset/testPK/data"
data = ds.ImageFolderDataset(data_dir)
data1 = [{'image': np.zeros(1, np.uint8), 'label': np.array(0, np.int32)}, data1 = [{'image': np.zeros(1, np.uint8), 'label': np.array(0, np.int32)},
{'image': np.zeros(2, np.uint8), 'label': np.array(1, np.int32)}, {'image': np.zeros(2, np.uint8), 'label': np.array(1, np.int32)},
@ -256,11 +316,16 @@ def test_imagefolder_padded():
def test_imagefolder_padded_with_decode(): def test_imagefolder_padded_with_decode():
"""
Feature: PaddedDataset
Description: Test padding an ImageFolderDataset with PaddedDataset followed by a Decode op
Expectation: Output is equal to the expected output
"""
num_shards = 5 num_shards = 5
count = 0 count = 0
for shard_id in range(num_shards): for shard_id in range(num_shards):
DATA_DIR = "../data/dataset/testPK/data" data_dir = "../data/dataset/testPK/data"
data = ds.ImageFolderDataset(DATA_DIR) data = ds.ImageFolderDataset(data_dir)
white_io = BytesIO() white_io = BytesIO()
Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG') Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG')
@ -285,11 +350,16 @@ def test_imagefolder_padded_with_decode():
def test_imagefolder_padded_with_decode_and_get_dataset_size(): def test_imagefolder_padded_with_decode_and_get_dataset_size():
"""
Feature: PaddedDataset
Description: Test padding an ImageFolderDataset with PaddedDataset followed by get_dataset_size and a Decode op
Expectation: Output is equal to the expected output
"""
num_shards = 5 num_shards = 5
count = 0 count = 0
for shard_id in range(num_shards): for shard_id in range(num_shards):
DATA_DIR = "../data/dataset/testPK/data" data_dir = "../data/dataset/testPK/data"
data = ds.ImageFolderDataset(DATA_DIR) data = ds.ImageFolderDataset(data_dir)
white_io = BytesIO() white_io = BytesIO()
Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG') Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG')
@ -316,6 +386,12 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size():
def test_more_shard_padded(): def test_more_shard_padded():
"""
Feature: PaddedDataset
Description: Test padding GeneratorDataset with another GeneratorDataset and
PaddedDataset with another PaddedDataset with larger num_shards used
Expectation: Output is equal to the expected output
"""
result_list = [] result_list = []
for i in range(8): for i in range(8):
result_list.append(1) result_list.append(1)
@ -429,6 +505,11 @@ def add_and_remove_cv_file():
def test_Mindrecord_Padded(remove_mindrecord_file): def test_Mindrecord_Padded(remove_mindrecord_file):
"""
Feature: PaddedDataset
Description: Test padding an MindDataset with PaddedDataset
Expectation: Output is equal to the expected output
"""
result_list = [] result_list = []
verify_list = [[1, 2], [3, 4], [5, 11], [6, 12], [7, 13], [8, 14], [9], [10]] verify_list = [[1, 2], [3, 4], [5, 11], [6, 12], [7, 13], [8, 14], [9], [10]]
num_readers = 4 num_readers = 4
@ -453,7 +534,9 @@ def test_Mindrecord_Padded(remove_mindrecord_file):
def test_clue_padded_and_skip_with_0_samples(): def test_clue_padded_and_skip_with_0_samples():
""" """
Test num_samples param of CLUE dataset Feature: PaddedDataset
Description: Test padding a CLUEDataset with PaddedDataset with and without samples
Expectation: Output is equal to the expected output except when dataset has no samples, in which error is raised
""" """
TRAIN_FILE = '../data/dataset/testCLUE/afqmc/train.json' TRAIN_FILE = '../data/dataset/testCLUE/afqmc/train.json'
@ -494,6 +577,11 @@ def test_clue_padded_and_skip_with_0_samples():
def test_celeba_padded(): def test_celeba_padded():
"""
Feature: PaddedDataset
Description: Test padding an CelebADataset with PaddedDataset
Expectation: Output is equal to the expected output
"""
data = ds.CelebADataset("../data/dataset/testCelebAData/") data = ds.CelebADataset("../data/dataset/testCelebAData/")
padded_samples = [{'image': np.zeros(1, np.uint8), 'attr': np.zeros(1, np.uint32)}] padded_samples = [{'image': np.zeros(1, np.uint8), 'attr': np.zeros(1, np.uint32)}]
@ -517,6 +605,6 @@ if __name__ == '__main__':
test_Unevenly_distributed() test_Unevenly_distributed()
test_three_datasets_connected() test_three_datasets_connected()
test_raise_error() test_raise_error()
test_imagefolden_padded() test_imagefolder_padded()
test_more_shard_padded() test_more_shard_padded()
test_Mindrecord_Padded(add_and_remove_cv_file) test_Mindrecord_Padded(add_and_remove_cv_file)

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -31,6 +31,11 @@ def compare(in1, in2, length, out1, out2):
def test_callable(): def test_callable():
"""
Feature: TruncateSequencePair op
Description: Test TruncateSequencePair op using an array of arrays or multiple arrays as the input
Expectation: Output is equal to the expected output
"""
op = text.TruncateSequencePair(3) op = text.TruncateSequencePair(3)
data = [["1", "2", "3"], ["4", "5"]] data = [["1", "2", "3"], ["4", "5"]]
result_text = op(*data) result_text = op(*data)
@ -42,6 +47,11 @@ def test_callable():
def test_basics(): def test_basics():
"""
Feature: TruncateSequencePair op
Description: Test TruncateSequencePair op basic usage
Expectation: Output is equal to the expected output
"""
compare(in1=[1, 2, 3], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5]) compare(in1=[1, 2, 3], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5])
compare(in1=[1, 2], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5]) compare(in1=[1, 2], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5])
compare(in1=[1], in2=[4], length=4, out1=[1], out2=[4]) compare(in1=[1], in2=[4], length=4, out1=[1], out2=[4])
@ -50,6 +60,11 @@ def test_basics():
def test_basics_odd(): def test_basics_odd():
"""
Feature: TruncateSequencePair op
Description: Test TruncateSequencePair op basic usage when the length is an odd number > 1
Expectation: Output is equal to the expected output
"""
compare(in1=[1, 2, 3], in2=[4, 5], length=3, out1=[1, 2], out2=[4]) compare(in1=[1, 2, 3], in2=[4, 5], length=3, out1=[1, 2], out2=[4])
compare(in1=[1, 2], in2=[4, 5], length=3, out1=[1, 2], out2=[4]) compare(in1=[1, 2], in2=[4, 5], length=3, out1=[1, 2], out2=[4])
compare(in1=[1], in2=[4], length=5, out1=[1], out2=[4]) compare(in1=[1], in2=[4], length=5, out1=[1], out2=[4])
@ -58,6 +73,11 @@ def test_basics_odd():
def test_basics_str(): def test_basics_str():
"""
Feature: TruncateSequencePair op
Description: Test TruncateSequencePair op basic usage when the inputs are array of strings
Expectation: Output is equal to the expected output
"""
compare(in1=[b"1", b"2", b"3"], in2=[4, 5], length=4, out1=[b"1", b"2"], out2=[4, 5]) compare(in1=[b"1", b"2", b"3"], in2=[4, 5], length=4, out1=[b"1", b"2"], out2=[4, 5])
compare(in1=[b"1", b"2"], in2=[b"4", b"5"], length=4, out1=[b"1", b"2"], out2=[b"4", b"5"]) compare(in1=[b"1", b"2"], in2=[b"4", b"5"], length=4, out1=[b"1", b"2"], out2=[b"4", b"5"])
compare(in1=[b"1"], in2=[4], length=4, out1=[b"1"], out2=[4]) compare(in1=[b"1"], in2=[4], length=4, out1=[b"1"], out2=[4])
@ -66,6 +86,11 @@ def test_basics_str():
def test_exceptions(): def test_exceptions():
"""
Feature: TruncateSequencePair op
Description: Test TruncateSequencePair op with length=1
Expectation: Output is equal to the expected output
"""
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1], out2=[]) compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1], out2=[])

View File

@ -118,9 +118,10 @@ class TestMinddataProfilingManager:
def test_profiling_simple_pipeline(self, tmp_path): def test_profiling_simple_pipeline(self, tmp_path):
""" """
Generator -> Shuffle -> Batch Feature: MindData Profiling Manager
Description: Test MindData profiling simple pipeline (Generator -> Shuffle -> Batch)
Expectation: Runs successfully
""" """
source = [(np.array([x]),) for x in range(1024)] source = [(np.array([x]),) for x in range(1024)]
data1 = ds.GeneratorDataset(source, ["data"]) data1 = ds.GeneratorDataset(source, ["data"])
data1 = data1.shuffle(64) data1 = data1.shuffle(64)
@ -161,11 +162,15 @@ class TestMinddataProfilingManager:
def test_profiling_complex_pipeline(self, tmp_path): def test_profiling_complex_pipeline(self, tmp_path):
""" """
Feature: MindData Profiling Manager
Description: Test MindData profiling complex pipeline:
Generator -> Map -> Generator -> Map ->
-> Zip -> Zip
TFReader -> Shuffle -> TFReader -> Shuffle ->
"""
Expectation: Runs successfully
"""
source = [(np.array([x]),) for x in range(1024)] source = [(np.array([x]),) for x in range(1024)]
data1 = ds.GeneratorDataset(source, ["gen"]) data1 = ds.GeneratorDataset(source, ["gen"])
data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"]) data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"])
@ -207,12 +212,15 @@ class TestMinddataProfilingManager:
def test_profiling_inline_ops_pipeline1(self, tmp_path): def test_profiling_inline_ops_pipeline1(self, tmp_path):
""" """
Test pipeline with inline ops: Concat and EpochCtrl Feature: MindData Profiling Manager
Description: Test MindData profiling pipeline with inline ops (Concat and EpochCtrl):
Generator -> Generator ->
Concat -> EpochCtrl Concat -> EpochCtrl
Generator -> Generator ->
"""
Expectation: Runs successfully
"""
# In source1 dataset: Number of rows is 3; its values are 0, 1, 2 # In source1 dataset: Number of rows is 3; its values are 0, 1, 2
def source1(): def source1():
for i in range(3): for i in range(3):
@ -267,10 +275,11 @@ class TestMinddataProfilingManager:
def test_profiling_inline_ops_pipeline2(self, tmp_path): def test_profiling_inline_ops_pipeline2(self, tmp_path):
""" """
Test pipeline with many inline ops Feature: MindData Profiling Manager
Generator -> Rename -> Skip -> Repeat -> Take Description: Test MindData profiling pipeline with many inline ops
(Generator -> Rename -> Skip -> Repeat -> Take)
Expectation: Runs successfully
""" """
# In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9 # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9
def source1(): def source1():
for i in range(10): for i in range(10):
@ -314,7 +323,9 @@ class TestMinddataProfilingManager:
def test_profiling_sampling_interval(self, tmp_path): def test_profiling_sampling_interval(self, tmp_path):
""" """
Test non-default monitor sampling interval Feature: MindData Profiling Manager
Description: Test non-default monitor sampling interval
Expectation: Runs successfully
""" """
interval_origin = ds.config.get_monitor_sampling_interval() interval_origin = ds.config.get_monitor_sampling_interval()
@ -349,10 +360,11 @@ class TestMinddataProfilingManager:
def test_profiling_basic_pipeline(self, tmp_path): def test_profiling_basic_pipeline(self, tmp_path):
""" """
Test with this basic pipeline Feature: MindData Profiling Manager
Generator -> Map -> Batch -> Repeat -> EpochCtrl Description: Test MindData profiling pipeline with basic pipeline
(Generator -> Map -> Batch -> Repeat -> EpochCtrl)
Expectation: Runs successfully
""" """
def source1(): def source1():
for i in range(8000): for i in range(8000):
yield (np.array([i]),) yield (np.array([i]),)
@ -402,10 +414,11 @@ class TestMinddataProfilingManager:
def test_profiling_cifar10_pipeline(self, tmp_path): def test_profiling_cifar10_pipeline(self, tmp_path):
""" """
Test with this common pipeline with Cifar10 Feature: MindData Profiling Manager
Cifar10 -> Map -> Map -> Batch -> Repeat Description: Test MindData profiling with common pipeline with Cifar10
(Cifar10 -> Map -> Map -> Batch -> Repeat)
Expectation: Runs successfully
""" """
# Create this common pipeline # Create this common pipeline
# Cifar10 -> Map -> Map -> Batch -> Repeat # Cifar10 -> Map -> Map -> Batch -> Repeat
DATA_DIR_10 = "../data/dataset/testCifar10Data" DATA_DIR_10 = "../data/dataset/testCifar10Data"
@ -455,12 +468,13 @@ class TestMinddataProfilingManager:
def test_profiling_seq_pipelines_epochctrl3(self, tmp_path): def test_profiling_seq_pipelines_epochctrl3(self, tmp_path):
""" """
Test with these 2 sequential pipelines: Feature: MindData Profiling Manager
1) Generator -> Batch -> EpochCtrl Description: Test MindData profiling with these 2 sequential pipelines
2) Generator -> Batch 1) Generator -> Batch -> EpochCtrl
Note: This is a simplification of the user scenario to use the same pipeline for training and then evaluation. 2) Generator -> Batch
Note: This is a simplification of the user scenario to use the same pipeline for train and then eval
Expectation: Runs successfully
""" """
source = [(np.array([x]),) for x in range(64)] source = [(np.array([x]),) for x in range(64)]
data1 = ds.GeneratorDataset(source, ["data"]) data1 = ds.GeneratorDataset(source, ["data"])
data1 = data1.batch(32) data1 = data1.batch(32)
@ -510,11 +524,12 @@ class TestMinddataProfilingManager:
def test_profiling_seq_pipelines_epochctrl2(self, tmp_path): def test_profiling_seq_pipelines_epochctrl2(self, tmp_path):
""" """
Test with these 2 sequential pipelines: Feature: MindData Profiling Manager
1) Generator -> Batch Description: Test MindData profiling with these 2 sequential pipelines
2) Generator -> Batch -> EpochCtrl 1) Generator -> Batch
2) Generator -> Batch -> EpochCtrl
Expectation: Runs successfully
""" """
source = [(np.array([x]),) for x in range(64)] source = [(np.array([x]),) for x in range(64)]
data2 = ds.GeneratorDataset(source, ["data"]) data2 = ds.GeneratorDataset(source, ["data"])
data2 = data2.batch(16) data2 = data2.batch(16)
@ -564,11 +579,12 @@ class TestMinddataProfilingManager:
def test_profiling_seq_pipelines_repeat(self, tmp_path): def test_profiling_seq_pipelines_repeat(self, tmp_path):
""" """
Test with these 2 sequential pipelines: Feature: MindData Profiling Manager
1) Generator -> Batch Description: Test MindData profiling with these 2 sequential pipelines
2) Generator -> Batch -> Repeat 1) Generator -> Batch
2) Generator -> Batch -> Repeat
Expectation: Runs successfully
""" """
source = [(np.array([x]),) for x in range(64)] source = [(np.array([x]),) for x in range(64)]
data2 = ds.GeneratorDataset(source, ["data"]) data2 = ds.GeneratorDataset(source, ["data"])
data2 = data2.batch(16) data2 = data2.batch(16)

View File

@ -1,4 +1,4 @@
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -95,7 +95,10 @@ class TestMindDataProfilingStartStop:
def test_profiling_early_stop(self, tmp_path): def test_profiling_early_stop(self, tmp_path):
""" """
Test MindData Profiling with Early Stop; profile for some iterations and then stop profiling Feature: MindData Profiling Manager
Description: Test MindData profiling with early stop; profile for some iterations and then
stop profiling
Expectation: Runs successfully
""" """
def source1(): def source1():
for i in range(8000): for i in range(8000):
@ -138,9 +141,10 @@ class TestMindDataProfilingStartStop:
def test_profiling_delayed_start(self, tmp_path): def test_profiling_delayed_start(self, tmp_path):
""" """
Test MindData Profiling with Delayed Start; profile for subset of iterations Feature: MindData Profiling Manager
Description: Test MindData profiling with delayed start; profile for subset of iterations
Expectation: Runs successfully
""" """
def source1(): def source1():
for i in range(8000): for i in range(8000):
yield (np.array([i]),) yield (np.array([i]),)
@ -181,9 +185,10 @@ class TestMindDataProfilingStartStop:
def test_profiling_multiple_start_stop(self, tmp_path): def test_profiling_multiple_start_stop(self, tmp_path):
""" """
Test MindData Profiling with Delayed Start and Multiple Start-Stop Sequences Feature: MindData Profiling Manager
Description: Test MindData profiling with delayed start and multiple start-stop sequences
Expectation: Runs successfully
""" """
def source1(): def source1():
for i in range(8000): for i in range(8000):
yield (np.array([i]),) yield (np.array([i]),)
@ -233,7 +238,9 @@ class TestMindDataProfilingStartStop:
def test_profiling_start_start(self): def test_profiling_start_start(self):
""" """
Test MindData Profiling with Start followed by Start - user error scenario Feature: MindData Profiling Manager
Description: Test MindData profiling with start followed by start
Expectation: Error is raised as expected
""" """
# Initialize MindData profiling manager # Initialize MindData profiling manager
self.md_profiler.init() self.md_profiler.init()
@ -252,7 +259,9 @@ class TestMindDataProfilingStartStop:
def test_profiling_stop_stop(self, tmp_path): def test_profiling_stop_stop(self, tmp_path):
""" """
Test MindData Profiling with Stop followed by Stop - user warning scenario Feature: MindData Profiling Manager
Description: Test MindData profiling with stop followed by stop
Expectation: Warning is produced
""" """
# Initialize MindData profiling manager # Initialize MindData profiling manager
self.md_profiler.init() self.md_profiler.init()
@ -270,7 +279,9 @@ class TestMindDataProfilingStartStop:
def test_profiling_stop_nostart(self): def test_profiling_stop_nostart(self):
""" """
Test MindData Profiling with Stop not without prior Start - user error scenario Feature: MindData Profiling Manager
Description: Test MindData profiling with stop not without prior start
Expectation: Error is raised as expected
""" """
# Initialize MindData profiling manager # Initialize MindData profiling manager
self.md_profiler.init() self.md_profiler.init()

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -26,6 +26,11 @@ GENERATE_GOLDEN = False
def test_case_project_single_column(): def test_case_project_single_column():
"""
Feature: Project op
Description: Test Project op on a single column
Expectation: Output is equal to the expected output
"""
columns = ["col_sint32"] columns = ["col_sint32"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -37,6 +42,11 @@ def test_case_project_single_column():
def test_case_project_multiple_columns_in_order(): def test_case_project_multiple_columns_in_order():
"""
Feature: Project op
Description: Test Project op on multiple columns in order
Expectation: Output is equal to the expected output
"""
columns = ["col_sint16", "col_float", "col_2d"] columns = ["col_sint16", "col_float", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -48,6 +58,11 @@ def test_case_project_multiple_columns_in_order():
def test_case_project_multiple_columns_out_of_order(): def test_case_project_multiple_columns_out_of_order():
"""
Feature: Project op
Description: Test Project op on multiple columns out of order
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -59,6 +74,11 @@ def test_case_project_multiple_columns_out_of_order():
def test_case_project_map(): def test_case_project_map():
"""
Feature: Project op
Description: Test Project op followed by a Map op
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -73,6 +93,11 @@ def test_case_project_map():
def test_case_map_project(): def test_case_map_project():
"""
Feature: Project op
Description: Test Project op after a Map op
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -88,6 +113,11 @@ def test_case_map_project():
def test_case_project_between_maps(): def test_case_project_between_maps():
"""
Feature: Project op
Description: Test Project op between Map ops (Map -> Project -> Map)
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -112,6 +142,11 @@ def test_case_project_between_maps():
def test_case_project_repeat(): def test_case_project_repeat():
"""
Feature: Project op
Description: Test Project op followed by Repeat op
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -126,6 +161,11 @@ def test_case_project_repeat():
def test_case_repeat_project(): def test_case_repeat_project():
"""
Feature: Project op
Description: Test Project op after a Repeat op
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -141,6 +181,11 @@ def test_case_repeat_project():
def test_case_map_project_map_project(): def test_case_map_project_map_project():
"""
Feature: Project op
Description: Test Map -> Project -> Map -> Project
Expectation: Output is equal to the expected output
"""
columns = ["col_3d", "col_sint64", "col_2d"] columns = ["col_3d", "col_sint64", "col_2d"]
parameters = {"params": {'columns': columns}} parameters = {"params": {'columns': columns}}
@ -160,8 +205,11 @@ def test_case_map_project_map_project():
def test_column_order(): def test_column_order():
"""test the output dict has maintained an insertion order.""" """
Feature: Project op
Description: Test Project op where the output dict should maintain the insertion order
Expectation: Output is equal to the expected output
"""
def gen_3_cols(num): def gen_3_cols(num):
for i in range(num): for i in range(num):
yield (np.array([i * 3]), np.array([i * 3 + 1]), np.array([i * 3 + 2])) yield (np.array([i * 3]), np.array([i * 3 + 1]), np.array([i * 3 + 2]))

View File

@ -27,7 +27,9 @@ GENERATE_GOLDEN = False
def test_case_0(): def test_case_0():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test 1-1 PyFunc : lambda x : x + x
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1-1 PyFunc : lambda x : x + x") logger.info("Test 1-1 PyFunc : lambda x : x + x")
@ -46,7 +48,9 @@ def test_case_0():
def test_case_1(): def test_case_1():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test 1-n PyFunc : lambda x : (x, x + x)
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ") logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ")
@ -69,7 +73,9 @@ def test_case_1():
def test_case_2(): def test_case_2():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test n-1 PyFunc : lambda x, y : x + y
Expectation: Output is equal to the expected output
""" """
logger.info("Test n-1 PyFunc : lambda x, y : x + y ") logger.info("Test n-1 PyFunc : lambda x, y : x + y ")
@ -91,7 +97,9 @@ def test_case_2():
def test_case_3(): def test_case_3():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test n-m PyFunc : lambda x, y : (x, x + 1, x + y)
Expectation: Output is equal to the expected output
""" """
logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)") logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
@ -117,7 +125,9 @@ def test_case_3():
def test_case_4(): def test_case_4():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test parallel n-m PyFunc : lambda x, y : (x, x + 1, x + y)
Expectation: Output is equal to the expected output
""" """
logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)") logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
@ -149,7 +159,9 @@ def func_5(x):
def test_case_5(): def test_case_5():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test 1-1 PyFunc : lambda x : np.ones(x.shape)
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)") logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)")
@ -166,7 +178,9 @@ def test_case_5():
def test_case_6(): def test_case_6():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x)
Expectation: Output is equal to the expected output
""" """
logger.info("Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x)") logger.info("Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x)")
@ -185,7 +199,9 @@ def test_case_6():
def test_case_7(): def test_case_7():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test 1-1 PyFunc with python_multiprocessing=True : lambda x : x + x
Expectation: Output is equal to the expected output
""" """
logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x") logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x")
@ -211,7 +227,9 @@ def test_case_7():
def test_case_8(): def test_case_8():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test n-m PyFunc with python_multiprocessing=True : lambda x, y : (x, x + 1, x + y)
Expectation: Output is equal to the expected output
""" """
logger.info("Test Multiprocess n-m PyFunc : lambda x, y : (x , x + 1, x + y)") logger.info("Test Multiprocess n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
@ -245,7 +263,9 @@ def test_case_8():
def test_case_9(): def test_case_9():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test multiple 1-1 PyFunc with python_multiprocessing=True : lambda x : x + x
Expectation: Output is equal to the expected output
""" """
logger.info("Test multiple 1-1 PyFunc Multiprocess: lambda x : x + x") logger.info("Test multiple 1-1 PyFunc Multiprocess: lambda x : x + x")
@ -271,7 +291,9 @@ def test_case_9():
def test_case_10(): def test_case_10():
""" """
Test PyFunc Feature: PyFunc in Map op
Description: Test multiple map with python_multiprocessing=True : lambda x : x + x
Expectation: Output is equal to the expected output
""" """
logger.info("Test multiple map with multiprocess: lambda x : x + x") logger.info("Test multiple map with multiprocess: lambda x : x + x")
@ -299,7 +321,9 @@ def test_case_10():
def test_pyfunc_implicit_compose(): def test_pyfunc_implicit_compose():
""" """
Test Implicit Compose with pyfunc Feature: PyFunc in Map op
Description: Test implicit compose with n-m PyFunc : lambda x, y : (x, x + 1, x + y)
Expectation: Output is equal to the expected output
""" """
logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)") logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)")
@ -324,6 +348,11 @@ def test_pyfunc_implicit_compose():
def test_pyfunc_exception(): def test_pyfunc_exception():
"""
Feature: PyFunc in Map op
Description: Test PyFunc with exception in child pyfunc process
Expectation: Exception is received and test ends gracefully
"""
logger.info("Test PyFunc Exception Throw: lambda x : raise Exception()") logger.info("Test PyFunc Exception Throw: lambda x : raise Exception()")
# Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together, # Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together,
@ -371,6 +400,11 @@ def test_pyfunc_exception_multiprocess():
def test_func_with_yield_manifest_dataset_01(): def test_func_with_yield_manifest_dataset_01():
"""
Feature: PyFunc in Map op
Description: Test PyFunc mapping on ManifestDataset
Expectation: Error is raised as expected
"""
def pass_func(_): def pass_func(_):
for i in range(10): for i in range(10):
yield (np.array([i]),) yield (np.array([i]),)

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -37,6 +37,11 @@ def generate_numpy_random_rgb(shape):
def test_rgb_hsv_hwc(): def test_rgb_hsv_hwc():
"""
Feature: RgbToHsv and HsvToRgb ops
Description: Test RgbToHsv and HsvToRgb utilities with an image in HWC format
Expectation: Output is equal to the expected output
"""
rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32)
rgb_np = rgb_flat.reshape((8, 8, 3)) rgb_np = rgb_flat.reshape((8, 8, 3))
hsv_base = np.array([ hsv_base = np.array([
@ -62,6 +67,11 @@ def test_rgb_hsv_hwc():
def test_rgb_hsv_batch_hwc(): def test_rgb_hsv_batch_hwc():
"""
Feature: RgbToHsv and HsvToRgb ops
Description: Test RgbToHsv and HsvToRgb utilities with a batch of images in HWC format
Expectation: Output is equal to the expected output
"""
rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32)
rgb_np = rgb_flat.reshape((4, 2, 8, 3)) rgb_np = rgb_flat.reshape((4, 2, 8, 3))
hsv_base = np.array([ hsv_base = np.array([
@ -87,6 +97,11 @@ def test_rgb_hsv_batch_hwc():
def test_rgb_hsv_chw(): def test_rgb_hsv_chw():
"""
Feature: RgbToHsv and HsvToRgb ops
Description: Test RgbToHsv and HsvToRgb utilities with an image in CHW format
Expectation: Output is equal to the expected output
"""
rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32)
rgb_np = rgb_flat.reshape((3, 8, 8)) rgb_np = rgb_flat.reshape((3, 8, 8))
hsv_base = np.array([ hsv_base = np.array([
@ -110,6 +125,11 @@ def test_rgb_hsv_chw():
def test_rgb_hsv_batch_chw(): def test_rgb_hsv_batch_chw():
"""
Feature: RgbToHsv and HsvToRgb ops
Description: Test RgbToHsv and HsvToRgb utilities with a batch of images in HWC format
Expectation: Output is equal to the expected output
"""
rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32)
rgb_imgs = rgb_flat.reshape((4, 3, 2, 8)) rgb_imgs = rgb_flat.reshape((4, 3, 2, 8))
hsv_base_imgs = np.array([ hsv_base_imgs = np.array([
@ -132,6 +152,11 @@ def test_rgb_hsv_batch_chw():
def test_rgb_hsv_pipeline(): def test_rgb_hsv_pipeline():
"""
Feature: RgbToHsv and HsvToRgb ops
Description: Test RgbToHsv and HsvToRgb ops in data pipeline
Expectation: Output is equal to the expected output
"""
# First dataset # First dataset
transforms1 = [ transforms1 = [
vision.Decode(True), vision.Decode(True),

View File

@ -25,6 +25,11 @@ from util import dataset_equal
# via the following lookup table (dict){(83554, 0): 0, (54214, 0): 1, (54214, 1): 2, (65512, 0): 3, (64631, 1): 4} # via the following lookup table (dict){(83554, 0): 0, (54214, 0): 1, (54214, 1): 2, (65512, 0): 3, (64631, 1): 4}
def test_sequential_sampler(print_res=False): def test_sequential_sampler(print_res=False):
"""
Feature: SequentialSampler op
Description: Test SequentialSampler op with various num_samples and num_repeats args combinations
Expectation: Output is equal to the expected output
"""
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
@ -48,6 +53,11 @@ def test_sequential_sampler(print_res=False):
def test_random_sampler(print_res=False): def test_random_sampler(print_res=False):
"""
Feature: RandomSampler op
Description: Test RandomSampler with various replacement, num_samples, and num_repeats args combinations
Expectation: Output is equal to the expected output
"""
ds.config.set_seed(1234) ds.config.set_seed(1234)
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
@ -72,6 +82,11 @@ def test_random_sampler(print_res=False):
def test_random_sampler_multi_iter(print_res=False): def test_random_sampler_multi_iter(print_res=False):
"""
Feature: RandomSampler op
Description: Test RandomSampler with multiple iteration based on num_repeats
Expectation: Output is equal to the expected output
"""
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
@ -93,12 +108,22 @@ def test_random_sampler_multi_iter(print_res=False):
def test_sampler_py_api(): def test_sampler_py_api():
"""
Feature: Sampler op
Description: Test add_child op of a Sampler op to a Sampler op
Expectation: Runs successfully
"""
sampler = ds.SequentialSampler().parse() sampler = ds.SequentialSampler().parse()
sampler1 = ds.RandomSampler().parse() sampler1 = ds.RandomSampler().parse()
sampler1.add_child(sampler) sampler1.add_child(sampler)
def test_python_sampler(): def test_python_sampler():
"""
Feature: Python Sampler op
Description: Test Python Sampler op with and without inheritance
Expectation: Output is equal to the expected output
"""
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
@ -162,6 +187,11 @@ def test_python_sampler():
def test_sequential_sampler2(): def test_sequential_sampler2():
"""
Feature: SequentialSampler op
Description: Test SequentialSampler op with various start_index and num_samples args combinations
Expectation: Output is equal to the expected output
"""
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
@ -188,6 +218,11 @@ def test_sequential_sampler2():
def test_subset_sampler(): def test_subset_sampler():
"""
Feature: SubsetSampler op
Description: Test SubsetSampler op with various indices and num_samples args combinations including invalid ones
Expectation: Output is equal to the expected output when input is valid, otherwise exception is raised
"""
def test_config(indices, num_samples=None, exception_msg=None): def test_config(indices, num_samples=None, exception_msg=None):
def pipeline(): def pipeline():
sampler = ds.SubsetSampler(indices, num_samples) sampler = ds.SubsetSampler(indices, num_samples)
@ -245,6 +280,11 @@ def test_subset_sampler():
def test_sampler_chain(): def test_sampler_chain():
"""
Feature: Chained Sampler
Description: ManifestDataset with sampler chain; add SequentialSampler as a child for DistributedSampler
Expectation: Correct error is raised as expected
"""
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
@ -279,6 +319,12 @@ def test_sampler_chain():
def test_add_sampler_invalid_input(): def test_add_sampler_invalid_input():
"""
Feature: Sampler op
Description: Test use_sampler op when the arg is not an instance of a sample and
another separate case when num_samples and sampler are specified at the same time in dataset arg
Expectation: Correct error is raised as expected
"""
manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"
_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} _ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4}
data1 = ds.ManifestDataset(manifest_file) data1 = ds.ManifestDataset(manifest_file)
@ -298,12 +344,22 @@ def test_add_sampler_invalid_input():
def test_distributed_sampler_invalid_offset(): def test_distributed_sampler_invalid_offset():
"""
Feature: DistributedSampler op
Description: Test DistributedSampler op when offset is more than num_shards
Expectation: Error is raised as expected
"""
with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
_ = ds.DistributedSampler(num_shards=4, shard_id=0, shuffle=False, num_samples=None, offset=5).parse() _ = ds.DistributedSampler(num_shards=4, shard_id=0, shuffle=False, num_samples=None, offset=5).parse()
assert "DistributedSampler: offset must be no more than num_shards(4)" in str(info.value) assert "DistributedSampler: offset must be no more than num_shards(4)" in str(info.value)
def test_sampler_list(): def test_sampler_list():
"""
Feature: Sampler op
Description: Test various sampler args (int and not int) in ImageFolderDataset
Expectation: Output is equal to the expected output when sampler has data type int, otherwise exception is raised
"""
data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5]) data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5])
data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1) data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1)
data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3) data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3)

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -33,18 +33,33 @@ def slice_compare(array, indexing, expected_array):
def test_slice_all(): def test_slice_all():
"""
Feature: Slice op
Description: Test Slice op for whole array (using None, ellipsis, and boolean for Slice op arg)
Expectation: Output is equal to the expected output
"""
slice_compare([1, 2, 3, 4, 5], None, [1, 2, 3, 4, 5]) slice_compare([1, 2, 3, 4, 5], None, [1, 2, 3, 4, 5])
slice_compare([1, 2, 3, 4, 5], ..., [1, 2, 3, 4, 5]) slice_compare([1, 2, 3, 4, 5], ..., [1, 2, 3, 4, 5])
slice_compare([1, 2, 3, 4, 5], True, [1, 2, 3, 4, 5]) slice_compare([1, 2, 3, 4, 5], True, [1, 2, 3, 4, 5])
def test_slice_single_index(): def test_slice_single_index():
"""
Feature: Slice op
Description: Test Slice op with a single index
Expectation: Output is equal to the expected output
"""
slice_compare([1, 2, 3, 4, 5], 0, [1]) slice_compare([1, 2, 3, 4, 5], 0, [1])
slice_compare([1, 2, 3, 4, 5], -3, [3]) slice_compare([1, 2, 3, 4, 5], -3, [3])
slice_compare([1, 2, 3, 4, 5], [0], [1]) slice_compare([1, 2, 3, 4, 5], [0], [1])
def test_slice_indices_multidim(): def test_slice_indices_multidim():
"""
Feature: Slice op
Description: Test Slice op using a multi dimension arg
Expectation: Output is equal to the expected output
"""
slice_compare([[1, 2, 3, 4, 5]], [[0], [0]], 1) slice_compare([[1, 2, 3, 4, 5]], [[0], [0]], 1)
slice_compare([[1, 2, 3, 4, 5]], [[0], [0, 3]], [[1, 4]]) slice_compare([[1, 2, 3, 4, 5]], [[0], [0, 3]], [[1, 4]])
slice_compare([[1, 2, 3, 4, 5]], [0], [[1, 2, 3, 4, 5]]) slice_compare([[1, 2, 3, 4, 5]], [0], [[1, 2, 3, 4, 5]])
@ -52,6 +67,11 @@ def test_slice_indices_multidim():
def test_slice_list_index(): def test_slice_list_index():
"""
Feature: Slice op
Description: Test Slice op using list of indices as the arg
Expectation: Output is equal to the expected output
"""
slice_compare([1, 2, 3, 4, 5], [0, 1, 4], [1, 2, 5]) slice_compare([1, 2, 3, 4, 5], [0, 1, 4], [1, 2, 5])
slice_compare([1, 2, 3, 4, 5], [4, 1, 0], [5, 2, 1]) slice_compare([1, 2, 3, 4, 5], [4, 1, 0], [5, 2, 1])
slice_compare([1, 2, 3, 4, 5], [-1, 1, 0], [5, 2, 1]) slice_compare([1, 2, 3, 4, 5], [-1, 1, 0], [5, 2, 1])
@ -60,12 +80,22 @@ def test_slice_list_index():
def test_slice_index_and_slice(): def test_slice_index_and_slice():
"""
Feature: Slice op
Description: Test Slice op where the arg is a list containing slice op
Expectation: Output is equal to the expected output
"""
slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), [4]], [[5]]) slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), [4]], [[5]])
slice_compare([[1, 2, 3, 4, 5]], [[0], slice(0, 2)], [[1, 2]]) slice_compare([[1, 2, 3, 4, 5]], [[0], slice(0, 2)], [[1, 2]])
slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [[1], slice(2, 4, 1)], [[7, 8]]) slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [[1], slice(2, 4, 1)], [[7, 8]])
def test_slice_slice_obj_1s(): def test_slice_slice_obj_1s():
"""
Feature: Slice op
Description: Test Slice op where the arg consists of slice op with 1 object
Expectation: Output is equal to the expected output
"""
slice_compare([1, 2, 3, 4, 5], slice(1), [1]) slice_compare([1, 2, 3, 4, 5], slice(1), [1])
slice_compare([1, 2, 3, 4, 5], slice(4), [1, 2, 3, 4]) slice_compare([1, 2, 3, 4, 5], slice(4), [1, 2, 3, 4])
slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(2), slice(2)], [[1, 2], [5, 6]]) slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(2), slice(2)], [[1, 2], [5, 6]])
@ -73,6 +103,11 @@ def test_slice_slice_obj_1s():
def test_slice_slice_obj_2s(): def test_slice_slice_obj_2s():
"""
Feature: Slice op
Description: Test Slice op where the arg consists of slice op with 2 objects
Expectation: Output is equal to the expected output
"""
slice_compare([1, 2, 3, 4, 5], slice(0, 2), [1, 2]) slice_compare([1, 2, 3, 4, 5], slice(0, 2), [1, 2])
slice_compare([1, 2, 3, 4, 5], slice(2, 4), [3, 4]) slice_compare([1, 2, 3, 4, 5], slice(2, 4), [3, 4])
slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2), slice(1, 2)], [[2], [6]]) slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2), slice(1, 2)], [[2], [6]])
@ -80,6 +115,12 @@ def test_slice_slice_obj_2s():
def test_slice_slice_obj_2s_multidim(): def test_slice_slice_obj_2s_multidim():
"""
Feature: Slice op
Description: Test Slice using multi dimension array and Slice op has multi dimension
arg that consists of slice with 2 objects in the arg
Expectation: Output is equal to the expected output
"""
slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1)], [[1, 2, 3, 4, 5]]) slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1)], [[1, 2, 3, 4, 5]])
slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(4)], [[1, 2, 3, 4]]) slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(4)], [[1, 2, 3, 4]])
slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(0, 3)], [[1, 2, 3]]) slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(0, 3)], [[1, 2, 3]])
@ -89,7 +130,9 @@ def test_slice_slice_obj_2s_multidim():
def test_slice_slice_obj_3s(): def test_slice_slice_obj_3s():
""" """
Test passing in all parameters to the slice objects Feature: Slice op
Description: Test Slice op where the arg consists of slice op with 3 objects
Expectation: Output is equal to the expected output
""" """
slice_compare([1, 2, 3, 4, 5], slice(0, 2, 1), [1, 2]) slice_compare([1, 2, 3, 4, 5], slice(0, 2, 1), [1, 2])
slice_compare([1, 2, 3, 4, 5], slice(0, 4, 1), [1, 2, 3, 4]) slice_compare([1, 2, 3, 4, 5], slice(0, 4, 1), [1, 2, 3, 4])
@ -109,6 +152,11 @@ def test_slice_slice_obj_3s():
def test_slice_obj_3s_double(): def test_slice_obj_3s_double():
"""
Feature: Slice op
Description: Test Slice op where the arg consists of slice op with 3 objects using an array of doubles
Expectation: Output is equal to the expected output
"""
slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 1), [1., 2.]) slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 1), [1., 2.])
slice_compare([1., 2., 3., 4., 5.], slice(0, 4, 1), [1., 2., 3., 4.]) slice_compare([1., 2., 3., 4., 5.], slice(0, 4, 1), [1., 2., 3., 4.])
slice_compare([1., 2., 3., 4., 5.], slice(0, 5, 2), [1., 3., 5.]) slice_compare([1., 2., 3., 4., 5.], slice(0, 5, 2), [1., 3., 5.])
@ -120,7 +168,9 @@ def test_slice_obj_3s_double():
def test_out_of_bounds_slicing(): def test_out_of_bounds_slicing():
""" """
Test passing indices outside of the input to the slice objects Feature: Slice op
Description: Test Slice op with indices outside of the input to the arg
Expectation: Output is equal to the expected output
""" """
slice_compare([1, 2, 3, 4, 5], slice(-15, -1), [1, 2, 3, 4]) slice_compare([1, 2, 3, 4, 5], slice(-15, -1), [1, 2, 3, 4])
slice_compare([1, 2, 3, 4, 5], slice(-15, 15), [1, 2, 3, 4, 5]) slice_compare([1, 2, 3, 4, 5], slice(-15, 15), [1, 2, 3, 4, 5])
@ -129,7 +179,9 @@ def test_out_of_bounds_slicing():
def test_slice_multiple_rows(): def test_slice_multiple_rows():
""" """
Test passing in multiple rows Feature: Slice op
Description: Test Slice op with multiple rows
Expectation: Output is equal to the expected output
""" """
dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]]
exp_dataset = [[], [4, 5], [2], [2, 3, 4]] exp_dataset = [[], [4, 5], [2], [2, 3, 4]]
@ -147,7 +199,9 @@ def test_slice_multiple_rows():
def test_slice_none_and_ellipsis(): def test_slice_none_and_ellipsis():
""" """
Test passing None and Ellipsis to Slice Feature: Slice op
Description: Test Slice op by passing None and Ellipsis in the arg
Expectation: Output is equal to the expected output
""" """
dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]]
exp_dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] exp_dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]]
@ -168,6 +222,11 @@ def test_slice_none_and_ellipsis():
def test_slice_obj_neg(): def test_slice_obj_neg():
"""
Feature: Slice op
Description: Test Slice op with indices outside of the input (negative int) to the arg
Expectation: Output is equal to the expected output
"""
slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -1), [5, 4, 3, 2]) slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -1), [5, 4, 3, 2])
slice_compare([1, 2, 3, 4, 5], slice(-1), [1, 2, 3, 4]) slice_compare([1, 2, 3, 4, 5], slice(-1), [1, 2, 3, 4])
slice_compare([1, 2, 3, 4, 5], slice(-2), [1, 2, 3]) slice_compare([1, 2, 3, 4, 5], slice(-2), [1, 2, 3])
@ -177,11 +236,21 @@ def test_slice_obj_neg():
def test_slice_all_str(): def test_slice_all_str():
"""
Feature: Slice op
Description: Test Slice op for whole array of strings (using None and ellipsis for the arg)
Expectation: Output is equal to the expected output
"""
slice_compare([b"1", b"2", b"3", b"4", b"5"], None, [b"1", b"2", b"3", b"4", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], None, [b"1", b"2", b"3", b"4", b"5"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], ..., [b"1", b"2", b"3", b"4", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], ..., [b"1", b"2", b"3", b"4", b"5"])
def test_slice_single_index_str(): def test_slice_single_index_str():
"""
Feature: Slice op
Description: Test Slice op with a single index for array of strings
Expectation: Output is equal to the expected output
"""
slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], [4], [b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [4], [b"5"])
@ -190,11 +259,21 @@ def test_slice_single_index_str():
def test_slice_indexes_multidim_str(): def test_slice_indexes_multidim_str():
"""
Feature: Slice op
Description: Test Slice op for array of strings using a multi dimensional arg
Expectation: Output is equal to the expected output
"""
slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], 0], [[b"1"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], 0], [[b"1"]])
slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], [0, 1]], [[b"1", b"2"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], [0, 1]], [[b"1", b"2"]])
def test_slice_list_index_str(): def test_slice_list_index_str():
"""
Feature: Slice op
Description: Test Slice op for array of strings with list of indices as the arg
Expectation: Output is equal to the expected output
"""
slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1, 4], [b"1", b"2", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1, 4], [b"1", b"2", b"5"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], [4, 1, 0], [b"5", b"2", b"1"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [4, 1, 0], [b"5", b"2", b"1"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], [3, 3, 3], [b"4", b"4", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [3, 3, 3], [b"4", b"4", b"4"])
@ -202,6 +281,11 @@ def test_slice_list_index_str():
# test str index object here # test str index object here
def test_slice_index_and_slice_str(): def test_slice_index_and_slice_str():
"""
Feature: Slice op
Description: Test Slice op for array of strings where the arg is a list containing slice op
Expectation: Output is equal to the expected output
"""
slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), 4], [[b"5"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), 4], [[b"5"]])
slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], slice(0, 2)], [[b"1", b"2"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], slice(0, 2)], [[b"1", b"2"]])
slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], [[1], slice(2, 4, 1)], slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], [[1], slice(2, 4, 1)],
@ -209,6 +293,11 @@ def test_slice_index_and_slice_str():
def test_slice_slice_obj_1s_str(): def test_slice_slice_obj_1s_str():
"""
Feature: Slice op
Description: Test Slice op for array of strings where the arg consists of slice op with 1 object
Expectation: Output is equal to the expected output
"""
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(1), [b"1"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(1), [b"1"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4), [b"1", b"2", b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4), [b"1", b"2", b"3", b"4"])
slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]],
@ -217,6 +306,11 @@ def test_slice_slice_obj_1s_str():
def test_slice_slice_obj_2s_str(): def test_slice_slice_obj_2s_str():
"""
Feature: Slice op
Description: Test Slice op for array of strings where the arg consists of slice op with 2 objects
Expectation: Output is equal to the expected output
"""
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2), [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2), [b"1", b"2"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 4), [b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 4), [b"3", b"4"])
slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]],
@ -224,6 +318,12 @@ def test_slice_slice_obj_2s_str():
def test_slice_slice_obj_2s_multidim_str(): def test_slice_slice_obj_2s_multidim_str():
"""
Feature: Slice op
Description: Test Slice using multi dimension array of strings and Slice op has multi dimension
arg that consists of slice with 2 objects in the arg
Expectation: Output is equal to the expected output
"""
slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1)], [[b"1", b"2", b"3", b"4", b"5"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1)], [[b"1", b"2", b"3", b"4", b"5"]])
slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), slice(4)], slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), slice(4)],
[[b"1", b"2", b"3", b"4"]]) [[b"1", b"2", b"3", b"4"]])
@ -236,7 +336,9 @@ def test_slice_slice_obj_2s_multidim_str():
def test_slice_slice_obj_3s_str(): def test_slice_slice_obj_3s_str():
""" """
Test passing in all parameters to the slice objects Feature: Slice op
Description: Test Slice op for array of strings where the arg consists of slice op with 3 objects
Expectation: Output is equal to the expected output
""" """
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 1), [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 1), [b"1", b"2"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 4, 1), [b"1", b"2", b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 4, 1), [b"1", b"2", b"3", b"4"])
@ -260,6 +362,11 @@ def test_slice_slice_obj_3s_str():
def test_slice_obj_neg_str(): def test_slice_obj_neg_str():
"""
Feature: Slice op
Description: Test Slice op for array of strings with indices outside of the input (negative int) to the arg
Expectation: Output is equal to the expected output
"""
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -1), [b"5", b"4", b"3", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -1), [b"5", b"4", b"3", b"2"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1), [b"1", b"2", b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1), [b"1", b"2", b"3", b"4"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-2), [b"1", b"2", b"3"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-2), [b"1", b"2", b"3"])
@ -270,7 +377,9 @@ def test_slice_obj_neg_str():
def test_out_of_bounds_slicing_str(): def test_out_of_bounds_slicing_str():
""" """
Test passing indices outside of the input to the slice objects Feature: Slice op
Description: Test Slice op for array of strings with indices outside of the input to the arg
Expectation: Output is equal to the expected output
""" """
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, -1), [b"1", b"2", b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, -1), [b"1", b"2", b"3", b"4"])
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, 15), [b"1", b"2", b"3", b"4", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, 15), [b"1", b"2", b"3", b"4", b"5"])
@ -286,7 +395,9 @@ def test_out_of_bounds_slicing_str():
def test_slice_exceptions(): def test_slice_exceptions():
""" """
Test passing in invalid parameters Feature: Slice op
Description: Test Slice op with invalid parameters
Expectation: Correct error is raised as expected
""" """
with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
slice_compare([b"1", b"2", b"3", b"4", b"5"], [5], [b"1", b"2", b"3", b"4", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [5], [b"1", b"2", b"3", b"4", b"5"])

View File

@ -1,4 +1,4 @@
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -31,35 +31,45 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
def test_slice_patches_01(plot=False): def test_slice_patches_01(plot=False):
""" """
slice rgb image(100, 200) to 4 patches Feature: SlicePatches op
Description: Test SlicePatches op on RGB image(100, 200) to 4 patches
Expectation: Output is equal to the expected output
""" """
slice_to_patches([100, 200], 2, 2, True, plot=plot) slice_to_patches([100, 200], 2, 2, True, plot=plot)
def test_slice_patches_02(plot=False): def test_slice_patches_02(plot=False):
""" """
no op Feature: SlicePatches op
Description: Test SlicePatches op on RGB image(100, 200) to 1 patch (no operation being applied)
Expectation: Output is equal to the expected output
""" """
slice_to_patches([100, 200], 1, 1, True, plot=plot) slice_to_patches([100, 200], 1, 1, True, plot=plot)
def test_slice_patches_03(plot=False): def test_slice_patches_03(plot=False):
""" """
slice rgb image(99, 199) to 4 patches in pad mode Feature: SlicePatches op
Description: Test SlicePatches op on RGB image(99, 199) to 4 patches in pad mode
Expectation: Output is equal to the expected output
""" """
slice_to_patches([99, 199], 2, 2, True, plot=plot) slice_to_patches([99, 199], 2, 2, True, plot=plot)
def test_slice_patches_04(plot=False): def test_slice_patches_04(plot=False):
""" """
slice rgb image(99, 199) to 4 patches in drop mode Feature: SlicePatches op
Description: Test SlicePatches op on RGB image(99, 199) to 4 patches in drop mode
Expectation: Output is equal to the expected output
""" """
slice_to_patches([99, 199], 2, 2, False, plot=plot) slice_to_patches([99, 199], 2, 2, False, plot=plot)
def test_slice_patches_05(plot=False): def test_slice_patches_05(plot=False):
""" """
slice rgb image(99, 199) to 4 patches in pad mode Feature: SlicePatches op
Description: Test SlicePatches op on RGB image(99, 199) to 4 patches in pad mode with fill_value=255
Expectation: Output is equal to the expected output
""" """
slice_to_patches([99, 199], 2, 2, True, 255, plot=plot) slice_to_patches([99, 199], 2, 2, True, 255, plot=plot)
@ -113,7 +123,9 @@ def slice_to_patches(ori_size, num_h, num_w, pad_or_drop, fill_value=0, plot=Fal
def test_slice_patches_exception_01(): def test_slice_patches_exception_01():
""" """
Test SlicePatches with invalid parameters Feature: SlicePatches op
Description: Test SlicePatches op with invalid parameters
Expectation: Correct error is raised as expected
""" """
logger.info("test_Slice_Patches_exception") logger.info("test_Slice_Patches_exception")
try: try:
@ -141,6 +153,11 @@ def test_slice_patches_exception_01():
assert "Input fill_value is not within" in str(e) assert "Input fill_value is not within" in str(e)
def test_slice_patches_06(): def test_slice_patches_06():
"""
Feature: SlicePatches op
Description: Test SlicePatches op on random RGB image(158, 126, 1) to 16 patches
Expectation: Output's shape is equal to the expected output's shape
"""
image = np.random.randint(0, 255, (158, 126, 1)).astype(np.int32) image = np.random.randint(0, 255, (158, 126, 1)).astype(np.int32)
slice_patches_op = vision.SlicePatches(2, 8) slice_patches_op = vision.SlicePatches(2, 8)
patches = slice_patches_op(image) patches = slice_patches_op(image)
@ -148,6 +165,11 @@ def test_slice_patches_06():
assert patches[0].shape == (79, 16, 1) assert patches[0].shape == (79, 16, 1)
def test_slice_patches_07(): def test_slice_patches_07():
"""
Feature: SlicePatches op
Description: Test SlicePatches op on random RGB image(158, 126) to 16 patches
Expectation: Output's shape is equal to the expected output's shape
"""
image = np.random.randint(0, 255, (158, 126)).astype(np.int32) image = np.random.randint(0, 255, (158, 126)).astype(np.int32)
slice_patches_op = vision.SlicePatches(2, 8) slice_patches_op = vision.SlicePatches(2, 8)
patches = slice_patches_op(image) patches = slice_patches_op(image)
@ -155,6 +177,11 @@ def test_slice_patches_07():
assert patches[0].shape == (79, 16) assert patches[0].shape == (79, 16)
def test_slice_patches_08(): def test_slice_patches_08():
"""
Feature: SlicePatches op
Description: Test SlicePatches op on random RGB image(1, 56, 82, 256) to 4 patches
Expectation: Output's shape is equal to the expected output's shape
"""
np_data = np.random.randint(0, 255, (1, 56, 82, 256)).astype(np.uint8) np_data = np.random.randint(0, 255, (1, 56, 82, 256)).astype(np.uint8)
dataset = ds.NumpySlicesDataset(np_data, column_names=["image"]) dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
slice_patches_op = vision.SlicePatches(2, 2) slice_patches_op = vision.SlicePatches(2, 2)
@ -166,6 +193,11 @@ def test_slice_patches_08():
assert patch_shape == (28, 41, 256) assert patch_shape == (28, 41, 256)
def test_slice_patches_09(): def test_slice_patches_09():
"""
Feature: SlicePatches op
Description: Test SlicePatches op on random RGB image(56, 82, 256) to 12 patches with pad mode
Expectation: Output's shape is equal to the expected output's shape
"""
image = np.random.randint(0, 255, (56, 82, 256)).astype(np.uint8) image = np.random.randint(0, 255, (56, 82, 256)).astype(np.uint8)
slice_patches_op = vision.SlicePatches(4, 3, mode.SliceMode.PAD) slice_patches_op = vision.SlicePatches(4, 3, mode.SliceMode.PAD)
patches = slice_patches_op(image) patches = slice_patches_op(image)
@ -173,12 +205,22 @@ def test_slice_patches_09():
assert patches[0].shape == (14, 28, 256) assert patches[0].shape == (14, 28, 256)
def skip_test_slice_patches_10(): def skip_test_slice_patches_10():
"""
Feature: SlicePatches op
Description: Test SlicePatches op on random RGB image(7000, 7000, 255) to 130 patches with drop mode
Expectation: Output's shape is equal to the expected output's shape
"""
image = np.random.randint(0, 255, (7000, 7000, 255)).astype(np.uint8) image = np.random.randint(0, 255, (7000, 7000, 255)).astype(np.uint8)
slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP) slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP)
patches = slice_patches_op(image) patches = slice_patches_op(image)
assert patches[0].shape == (700, 538, 255) assert patches[0].shape == (700, 538, 255)
def skip_test_slice_patches_11(): def skip_test_slice_patches_11():
"""
Feature: SlicePatches op
Description: Test SlicePatches op on random RGB image(1, 7000, 7000, 256) to 130 patches with drop mode
Expectation: Output's shape is equal to the expected output's shape
"""
np_data = np.random.randint(0, 255, (1, 7000, 7000, 256)).astype(np.uint8) np_data = np.random.randint(0, 255, (1, 7000, 7000, 256)).astype(np.uint8)
dataset = ds.NumpySlicesDataset(np_data, column_names=["image"]) dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP) slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP)

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -74,6 +74,12 @@ def split_with_invalid_inputs(d):
def test_unmappable_invalid_input(): def test_unmappable_invalid_input():
"""
Feature: Split op
Description: Test split op using unmappable dataset (TextFileDataset)
with various invalid inputs and applying split op on sharded dataset
Expectation: Correct error is raised as expected
"""
d = ds.TextFileDataset(text_file_dataset_path) d = ds.TextFileDataset(text_file_dataset_path)
split_with_invalid_inputs(d) split_with_invalid_inputs(d)
@ -84,6 +90,12 @@ def test_unmappable_invalid_input():
def test_unmappable_split(): def test_unmappable_split():
"""
Feature: Split op
Description: Test split op using unmappable dataset (TextFileDataset)
with absolute rows, exact percentages, and fuzzy percentages as input
Expectation: Output is equal to the expected output
"""
original_num_parallel_workers = config_get_set_num_parallel_workers(4) original_num_parallel_workers = config_get_set_num_parallel_workers(4)
d = ds.TextFileDataset(text_file_dataset_path, shuffle=False) d = ds.TextFileDataset(text_file_dataset_path, shuffle=False)
@ -133,6 +145,11 @@ def test_unmappable_split():
def test_unmappable_randomize_deterministic(): def test_unmappable_randomize_deterministic():
"""
Feature: Split op
Description: Test split op using unmappable dataset (TextFileDataset) with randomization
Expectation: Output is equal to the expected output
"""
original_num_parallel_workers = config_get_set_num_parallel_workers(4) original_num_parallel_workers = config_get_set_num_parallel_workers(4)
# the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3]
@ -159,6 +176,11 @@ def test_unmappable_randomize_deterministic():
def test_unmappable_randomize_repeatable(): def test_unmappable_randomize_repeatable():
"""
Feature: Split op
Description: Test split op using unmappable dataset (TextFileDataset) with randomization followed by repeat op
Expectation: Output is equal to the expected output
"""
original_num_parallel_workers = config_get_set_num_parallel_workers(4) original_num_parallel_workers = config_get_set_num_parallel_workers(4)
# the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3]
@ -188,6 +210,11 @@ def test_unmappable_randomize_repeatable():
def test_unmappable_get_dataset_size(): def test_unmappable_get_dataset_size():
"""
Feature: Split op
Description: Test split op using unmappable dataset (TextFileDataset) followed by get_dataset_size
Expectation: Output is equal to the expected output
"""
d = ds.TextFileDataset(text_file_dataset_path, shuffle=False) d = ds.TextFileDataset(text_file_dataset_path, shuffle=False)
s1, s2 = d.split([0.8, 0.2]) s1, s2 = d.split([0.8, 0.2])
@ -197,6 +224,12 @@ def test_unmappable_get_dataset_size():
def test_unmappable_multi_split(): def test_unmappable_multi_split():
"""
Feature: Split op
Description: Test split op using unmappable dataset (TextFileDataset)
with randomization followed by deterministic split or another randomized split
Expectation: Output is equal to the expected output
"""
original_num_parallel_workers = config_get_set_num_parallel_workers(4) original_num_parallel_workers = config_get_set_num_parallel_workers(4)
# the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3]
@ -268,6 +301,12 @@ def test_unmappable_multi_split():
def test_mappable_invalid_input(): def test_mappable_invalid_input():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset) with invalid inputs and
applying split op on sharded dataset
Expectation: Error is raised as expected
"""
d = ds.ManifestDataset(manifest_file) d = ds.ManifestDataset(manifest_file)
split_with_invalid_inputs(d) split_with_invalid_inputs(d)
@ -278,6 +317,12 @@ def test_mappable_invalid_input():
def test_mappable_split_general(): def test_mappable_split_general():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset)
with absolute rows, exact percentages, and fuzzy percentages
Expectation: Output is equal to the expected output
"""
d = ds.ManifestDataset(manifest_file, shuffle=False) d = ds.ManifestDataset(manifest_file, shuffle=False)
d = d.take(5) d = d.take(5)
@ -286,11 +331,11 @@ def test_mappable_split_general():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1, 2, 3] assert s1_output == [0, 1, 2, 3]
assert s2_output == [4] assert s2_output == [4]
@ -300,11 +345,11 @@ def test_mappable_split_general():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1, 2, 3] assert s1_output == [0, 1, 2, 3]
assert s2_output == [4] assert s2_output == [4]
@ -314,17 +359,23 @@ def test_mappable_split_general():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1] assert s1_output == [0, 1]
assert s2_output == [2, 3, 4] assert s2_output == [2, 3, 4]
def test_mappable_split_optimized(): def test_mappable_split_optimized():
"""
Feature: Split op
Description: Test optimized split op using mappable dataset (ManifestDataset)
with absolute rows, exact percentages, and fuzzy percentages
Expectation: Output is equal to the expected output
"""
d = ds.ManifestDataset(manifest_file, shuffle=False) d = ds.ManifestDataset(manifest_file, shuffle=False)
# absolute rows # absolute rows
@ -332,11 +383,11 @@ def test_mappable_split_optimized():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1, 2, 3] assert s1_output == [0, 1, 2, 3]
assert s2_output == [4] assert s2_output == [4]
@ -346,11 +397,11 @@ def test_mappable_split_optimized():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1, 2, 3] assert s1_output == [0, 1, 2, 3]
assert s2_output == [4] assert s2_output == [4]
@ -360,17 +411,22 @@ def test_mappable_split_optimized():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1] assert s1_output == [0, 1]
assert s2_output == [2, 3, 4] assert s2_output == [2, 3, 4]
def test_mappable_randomize_deterministic(): def test_mappable_randomize_deterministic():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset) with randomization
Expectation: Output is equal to the expected output
"""
# the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2]
ds.config.set_seed(53) ds.config.set_seed(53)
@ -380,11 +436,11 @@ def test_mappable_randomize_deterministic():
for _ in range(10): for _ in range(10):
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
# note no overlap # note no overlap
assert s1_output == [0, 1, 3, 4] assert s1_output == [0, 1, 3, 4]
@ -392,6 +448,11 @@ def test_mappable_randomize_deterministic():
def test_mappable_randomize_repeatable(): def test_mappable_randomize_repeatable():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset) followed by repeat op
Expectation: Output is equal to the expected output
"""
# the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2]
ds.config.set_seed(53) ds.config.set_seed(53)
@ -404,11 +465,11 @@ def test_mappable_randomize_repeatable():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
# note no overlap # note no overlap
assert s1_output == [0, 1, 3, 4] * num_epochs assert s1_output == [0, 1, 3, 4] * num_epochs
@ -416,6 +477,11 @@ def test_mappable_randomize_repeatable():
def test_mappable_sharding(): def test_mappable_sharding():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset) followed by sharding the dataset after split
Expectation: Output is equal to the expected output
"""
# set arbitrary seed for repeatability for shard after split # set arbitrary seed for repeatability for shard after split
# the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2]
ds.config.set_seed(53) ds.config.set_seed(53)
@ -443,12 +509,12 @@ def test_mappable_sharding():
# shard 0 # shard 0
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
# shard 1 # shard 1
d2s1_output = [] d2s1_output = []
for item in d2s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in d2s1.create_dict_iterator(num_epochs=1, output_numpy=True):
d2s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) d2s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
rows_per_shard_per_epoch = 2 rows_per_shard_per_epoch = 2
assert len(s1_output) == rows_per_shard_per_epoch * num_epochs assert len(s1_output) == rows_per_shard_per_epoch * num_epochs
@ -469,17 +535,22 @@ def test_mappable_sharding():
# test other split # test other split
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
d2s2_output = [] d2s2_output = []
for item in d2s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in d2s2.create_dict_iterator(num_epochs=1, output_numpy=True):
d2s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) d2s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s2_output == [2] assert s2_output == [2]
assert d2s2_output == [2] assert d2s2_output == [2]
def test_mappable_get_dataset_size(): def test_mappable_get_dataset_size():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset) followed by get_dataset_size
Expectation: Output is equal to the expected output
"""
d = ds.ManifestDataset(manifest_file, shuffle=False) d = ds.ManifestDataset(manifest_file, shuffle=False)
s1, s2 = d.split([4, 1]) s1, s2 = d.split([4, 1])
@ -489,6 +560,12 @@ def test_mappable_get_dataset_size():
def test_mappable_multi_split(): def test_mappable_multi_split():
"""
Feature: Split op
Description: Test randomized split op using mappable dataset (ManifestDataset) followed by
another split op with and without randomization
Expectation: Output is equal to the expected output
"""
# the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2]
ds.config.set_seed(53) ds.config.set_seed(53)
@ -499,7 +576,7 @@ def test_mappable_multi_split():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == s1_correct_output assert s1_output == s1_correct_output
# no randomize in second split # no randomize in second split
@ -507,15 +584,15 @@ def test_mappable_multi_split():
s1s1_output = [] s1s1_output = []
for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s1s2_output = [] s1s2_output = []
for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s1s3_output = [] s1s3_output = []
for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True):
s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s3_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1s1_output == [s1_correct_output[0]] assert s1s1_output == [s1_correct_output[0]]
assert s1s2_output == [s1_correct_output[1], s1_correct_output[2]] assert s1s2_output == [s1_correct_output[1], s1_correct_output[2]]
@ -523,7 +600,7 @@ def test_mappable_multi_split():
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s2_output == [2] assert s2_output == [2]
# randomize in second split # randomize in second split
@ -534,15 +611,15 @@ def test_mappable_multi_split():
s1s1_output = [] s1s1_output = []
for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s1s2_output = [] s1s2_output = []
for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s1s3_output = [] s1s3_output = []
for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True):
s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s3_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1s1_output == [s1_correct_output[random_sampler_ids[0]]] assert s1s1_output == [s1_correct_output[random_sampler_ids[0]]]
assert s1s2_output == [s1_correct_output[random_sampler_ids[1]], s1_correct_output[random_sampler_ids[2]]] assert s1s2_output == [s1_correct_output[random_sampler_ids[1]], s1_correct_output[random_sampler_ids[2]]]
@ -550,11 +627,16 @@ def test_mappable_multi_split():
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s2_output == [2] assert s2_output == [2]
def test_rounding(): def test_rounding():
"""
Feature: Split op
Description: Test split op using mappable dataset (ManifestDataset) with under rounding and over rounding arg
Expectation: Output is equal to the expected output
"""
d = ds.ManifestDataset(manifest_file, shuffle=False) d = ds.ManifestDataset(manifest_file, shuffle=False)
# under rounding # under rounding
@ -562,11 +644,11 @@ def test_rounding():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0, 1, 2] assert s1_output == [0, 1, 2]
assert s2_output == [3, 4] assert s2_output == [3, 4]
@ -576,15 +658,15 @@ def test_rounding():
s1_output = [] s1_output = []
for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True):
s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s2_output = [] s2_output = []
for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True):
s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
s3_output = [] s3_output = []
for item in s3.create_dict_iterator(num_epochs=1, output_numpy=True): for item in s3.create_dict_iterator(num_epochs=1, output_numpy=True):
s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s3_output.append(manifest_map.get((item["image"].shape[0], item["label"].item())))
assert s1_output == [0] assert s1_output == [0]
assert s2_output == [1, 2] assert s2_output == [1, 2]

View File

@ -1,4 +1,4 @@
# Copyright 2019 Huawei Technologies Co., Ltd # Copyright 2019-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -21,6 +21,11 @@ from mindspore.dataset.text import to_str, to_bytes
def test_basic(): def test_basic():
"""
Feature: Tensor
Description: Test basic Tensor op on NumPy dataset with strings
Expectation: Output is equal to the expected output
"""
x = np.array([["ab", "cde", "121"], ["x", "km", "789"]], dtype='S') x = np.array([["ab", "cde", "121"], ["x", "km", "789"]], dtype='S')
n = cde.Tensor(x) n = cde.Tensor(x)
arr = n.as_array() arr = n.as_array()
@ -40,6 +45,11 @@ def compare(strings, dtype='S'):
def test_generator(): def test_generator():
"""
Feature: Tensor
Description: Test string tensor with various valid inputs using GeneratorDataset
Expectation: Output is equal to the expected output
"""
compare(["ab"]) compare(["ab"])
compare(["", ""]) compare(["", ""])
compare([""]) compare([""])
@ -72,6 +82,11 @@ chinese = np.array(["今天天气太好了我们一起去外面玩吧",
def test_batching_strings(): def test_batching_strings():
"""
Feature: Tensor
Description: Test applying Batch op to string tensor using GeneratorDataset
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
for row in chinese: for row in chinese:
yield (np.array(row),) yield (np.array(row),)
@ -84,6 +99,11 @@ def test_batching_strings():
def test_map(): def test_map():
"""
Feature: Tensor
Description: Test applying Map op split to string tensor using GeneratorDataset
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array(["ab cde 121"], dtype='S'),) yield (np.array(["ab cde 121"], dtype='S'),)
@ -101,6 +121,11 @@ def test_map():
def test_map2(): def test_map2():
"""
Feature: Tensor
Description: Test applying Map op upper to string tensor using GeneratorDataset
Expectation: Output is equal to the expected output
"""
def gen(): def gen():
yield (np.array(["ab cde 121"], dtype='S'),) yield (np.array(["ab cde 121"], dtype='S'),)
@ -117,6 +142,11 @@ def test_map2():
def test_tfrecord1(): def test_tfrecord1():
"""
Feature: Tensor
Description: Test string tensor using TFRecordDataset with created schema using "string" type
Expectation: Output is equal to the expected output
"""
s = ds.Schema() s = ds.Schema()
s.add_column("line", "string", []) s.add_column("line", "string", [])
s.add_column("words", "string", [-1]) s.add_column("words", "string", [-1])
@ -134,6 +164,11 @@ def test_tfrecord1():
def test_tfrecord2(): def test_tfrecord2():
"""
Feature: Tensor
Description: Test string tensor using TFRecordDataset with schema from a file
Expectation: Output is equal to the expected output
"""
data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False,
schema='../data/dataset/testTextTFRecord/datasetSchema.json') schema='../data/dataset/testTextTFRecord/datasetSchema.json')
for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)):
@ -146,6 +181,11 @@ def test_tfrecord2():
def test_tfrecord3(): def test_tfrecord3():
"""
Feature: Tensor
Description: Test string tensor using TFRecordDataset with created schema using mstype.string type
Expectation: Output is equal to the expected output
"""
s = ds.Schema() s = ds.Schema()
s.add_column("line", mstype.string, []) s.add_column("line", mstype.string, [])
s.add_column("words", mstype.string, [-1, 2]) s.add_column("words", mstype.string, [-1, 2])
@ -184,6 +224,11 @@ def create_text_mindrecord():
def test_mindrecord(): def test_mindrecord():
"""
Feature: Tensor
Description: Test string tensor using MindDataset
Expectation: Output is equal to the expected output
"""
data = ds.MindDataset("../data/dataset/testTextMindRecord/test.mindrecord", shuffle=False) data = ds.MindDataset("../data/dataset/testTextMindRecord/test.mindrecord", shuffle=False)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)):
@ -228,6 +273,11 @@ def gen_var_cols_2d(num):
def test_batch_padding_01(): def test_batch_padding_01():
"""
Feature: Batch Padding
Description: Test batch padding where input_shape=[x] and output_shape=[y] in which y > x
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], b"-2"), "col1d": ([2], b"-1")}) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], b"-2"), "col1d": ([2], b"-1")})
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -238,6 +288,12 @@ def test_batch_padding_01():
def test_batch_padding_02(): def test_batch_padding_02():
"""
Feature: Batch Padding
Description: Test batch padding where padding in one dimension and truncate in the other, in which
input_shape=[x1,x2] and output_shape=[y1,y2] and y1 > x1 and y2 < x2
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], "")}) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], "")})
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -247,6 +303,11 @@ def test_batch_padding_02():
def test_batch_padding_03(): def test_batch_padding_03():
"""
Feature: Batch Padding
Description: Test batch padding using automatic padding for a specific column
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"]) data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, "PAD_VALUE")}) # pad automatically data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, "PAD_VALUE")}) # pad automatically
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -260,6 +321,11 @@ def test_batch_padding_03():
def test_batch_padding_04(): def test_batch_padding_04():
"""
Feature: Batch Padding
Description: Test batch padding using default setting for all columns
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically
data1 = data1.repeat(2) data1 = data1.repeat(2)
@ -269,6 +335,11 @@ def test_batch_padding_04():
def test_batch_padding_05(): def test_batch_padding_05():
"""
Feature: Batch Padding
Description: Test batch padding where None is in different places
Expectation: Output is equal to the expected output
"""
data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"])
data1 = data1.batch(batch_size=3, drop_remainder=False, data1 = data1.batch(batch_size=3, drop_remainder=False,
pad_info={"col2": ([2, None], "-2"), "col1": (None, "-1")}) # pad automatically pad_info={"col2": ([2, None], "-2"), "col1": (None, "-1")}) # pad automatically

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd # Copyright 2020-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -35,7 +35,9 @@ def string_dataset_generator(strings):
def test_to_number_eager(): def test_to_number_eager():
""" """
Test ToNumber op is callable Feature: ToNumber op
Description: Test ToNumber op in eager mode with valid and invalid tensor input
Expectation: Output is equal to the expected output for valid tensor and error is raised otherwise
""" """
input_strings = [["1", "2", "3"], ["4", "5", "6"]] input_strings = [["1", "2", "3"], ["4", "5", "6"]]
op = text.ToNumber(mstype.int8) op = text.ToNumber(mstype.int8)
@ -59,6 +61,11 @@ def test_to_number_eager():
def test_to_number_typical_case_integral(): def test_to_number_typical_case_integral():
"""
Feature: ToNumber op
Description: Test ToNumber op with int data type
Expectation: Output is equal to the expected output
"""
input_strings = [["-121", "14"], ["-2219", "7623"], ["-8162536", "162371864"], input_strings = [["-121", "14"], ["-2219", "7623"], ["-8162536", "162371864"],
["-1726483716", "98921728421"]] ["-1726483716", "98921728421"]]
@ -75,6 +82,11 @@ def test_to_number_typical_case_integral():
def test_to_number_typical_case_non_integral(): def test_to_number_typical_case_non_integral():
"""
Feature: ToNumber op
Description: Test ToNumber op with float data type
Expectation: Output is equal to the expected output
"""
input_strings = [["-1.1", "1.4"], ["-2219.321", "7623.453"], ["-816256.234282", "162371864.243243"]] input_strings = [["-1.1", "1.4"], ["-2219.321", "7623.453"], ["-816256.234282", "162371864.243243"]]
epsilons = [0.001, 0.001, 0.0001, 0.0001, 0.0000001, 0.0000001] epsilons = [0.001, 0.001, 0.0001, 0.0001, 0.0000001, 0.0000001]
@ -105,6 +117,11 @@ def out_of_bounds_error_message_check(dataset, np_type, value_to_cast):
def test_to_number_out_of_bounds_integral(): def test_to_number_out_of_bounds_integral():
"""
Feature: ToNumber op
Description: Test ToNumber op with values that are out of bounds for int range
Expectation: Error is raised as expected
"""
for np_type, ms_type in zip(np_integral_types, ms_integral_types): for np_type, ms_type in zip(np_integral_types, ms_integral_types):
type_info = np.iinfo(np_type) type_info = np.iinfo(np_type)
input_strings = [str(type_info.max + 10)] input_strings = [str(type_info.max + 10)]
@ -119,6 +136,11 @@ def test_to_number_out_of_bounds_integral():
def test_to_number_out_of_bounds_non_integral(): def test_to_number_out_of_bounds_non_integral():
"""
Feature: ToNumber op
Description: Test ToNumber op with values that are out of bounds for float range
Expectation: Error is raised as expected
"""
above_range = [str(np.finfo(np.float16).max * 10), str(np.finfo(np.float32).max * 10), "1.8e+308"] above_range = [str(np.finfo(np.float16).max * 10), str(np.finfo(np.float32).max * 10), "1.8e+308"]
input_strings = [above_range[0]] input_strings = [above_range[0]]
@ -179,6 +201,11 @@ def test_to_number_out_of_bounds_non_integral():
def test_to_number_boundaries_integral(): def test_to_number_boundaries_integral():
"""
Feature: ToNumber op
Description: Test ToNumber op with values that are exactly at the boundaries of the range of int
Expectation: Output is equal to the expected output
"""
for np_type, ms_type in zip(np_integral_types, ms_integral_types): for np_type, ms_type in zip(np_integral_types, ms_integral_types):
type_info = np.iinfo(np_type) type_info = np.iinfo(np_type)
input_strings = [str(type_info.max)] input_strings = [str(type_info.max)]
@ -201,6 +228,11 @@ def test_to_number_boundaries_integral():
def test_to_number_invalid_input(): def test_to_number_invalid_input():
"""
Feature: ToNumber op
Description: Test ToNumber op with invalid input string
Expectation: Error is raised as expected
"""
input_strings = ["a8fa9ds8fa"] input_strings = ["a8fa9ds8fa"]
dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings")
dataset = dataset.map(operations=text.ToNumber(mstype.int32), input_columns=["strings"]) dataset = dataset.map(operations=text.ToNumber(mstype.int32), input_columns=["strings"])
@ -212,6 +244,11 @@ def test_to_number_invalid_input():
def test_to_number_invalid_type(): def test_to_number_invalid_type():
"""
Feature: ToNumber op
Description: Test ToNumber op to map into an invalid data type
Expectation: Error is raised as expected
"""
with pytest.raises(TypeError) as info: with pytest.raises(TypeError) as info:
dataset = ds.GeneratorDataset(string_dataset_generator(["a8fa9ds8fa"]), "strings") dataset = ds.GeneratorDataset(string_dataset_generator(["a8fa9ds8fa"]), "strings")
dataset = dataset.map(operations=text.ToNumber(mstype.bool_), input_columns=["strings"]) dataset = dataset.map(operations=text.ToNumber(mstype.bool_), input_columns=["strings"])

View File

@ -1,4 +1,4 @@
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -45,9 +45,9 @@ def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
def test_vad_pipeline1(): def test_vad_pipeline1():
""" """
Feature: Vad Feature: Vad op
Description: test Vad cpp op in pipeline Description: Test Vad op in pipeline
Expectation: equal results from Mindspore and benchmark Expectation: Equal results from Mindspore and benchmark
""" """
# <1000> # <1000>
dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :], dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :],
@ -75,9 +75,9 @@ def test_vad_pipeline1():
def test_vad_pipeline2(): def test_vad_pipeline2():
""" """
Feature: Vad Feature: Vad op
Description: test Vad cpp op in pipeline Description: Test Vad op in pipeline
Expectation: equal results from Mindspore and benchmark Expectation: Equal results from Mindspore and benchmark
""" """
# <1, 1000> trigger level and time # <1, 1000> trigger level and time
dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy") dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy")
@ -130,9 +130,9 @@ def test_vad_pipeline2():
def test_vad_pipeline3(): def test_vad_pipeline3():
""" """
Feature: Vad Feature: Vad op
Description: test Vad cpp op in pipeline Description: Test Vad op in pipeline
Expectation: equal results from Mindspore and benchmark Expectation: Equal results from Mindspore and benchmark
""" """
# <1, 1000> noise # <1, 1000> noise
dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy") dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy")
@ -200,9 +200,9 @@ def test_vad_pipeline3():
def test_vad_pipeline_invalid_param1(): def test_vad_pipeline_invalid_param1():
""" """
Feature: Vad Feature: Vad op
Description: test Vad with invalid input parameters Description: Test Vad with invalid input parameters
Expectation: throw ValueError or TypeError Expectation: Throw ValueError or TypeError
""" """
logger.info("test InverseMelScale op with default values") logger.info("test InverseMelScale op with default values")
in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :] in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :]
@ -243,9 +243,9 @@ def test_vad_pipeline_invalid_param1():
def test_vad_pipeline_invalid_param2(): def test_vad_pipeline_invalid_param2():
""" """
Feature: Vad Feature: Vad op
Description: test Vad with invalid input parameters Description: Test Vad with invalid input parameters
Expectation: throw ValueError or TypeError Expectation: Throw ValueError or TypeError
""" """
logger.info("test InverseMelScale op with default values") logger.info("test InverseMelScale op with default values")
in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :] in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :]
@ -283,9 +283,9 @@ def test_vad_pipeline_invalid_param2():
def test_vad_pipeline_invalid_param3(): def test_vad_pipeline_invalid_param3():
""" """
Feature: Vad Feature: Vad op
Description: test Vad with invalid input parameters Description: Test Vad with invalid input parameters
Expectation: throw ValueError or TypeError Expectation: Throw ValueError or TypeError
""" """
logger.info("test InverseMelScale op with default values") logger.info("test InverseMelScale op with default values")
in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :] in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :]
@ -343,9 +343,9 @@ def test_vad_pipeline_invalid_param3():
def test_vad_eager(): def test_vad_eager():
""" """
Feature: Vad Feature: Vad op
Description: test Vad cpp op with eager mode Description: Test Vad op with eager mode
Expectation: equal results from Mindspore and benchmark Expectation: Equal results from Mindspore and benchmark
""" """
spectrogram = np.load(DATA_DIR + "single_channel.npy") spectrogram = np.load(DATA_DIR + "single_channel.npy")
out_ms = c_audio.Vad(sample_rate=600)(spectrogram) out_ms = c_audio.Vad(sample_rate=600)(spectrogram)

View File

@ -19,6 +19,13 @@ from mindspore import log as logger
def test_batch_corner_cases(): def test_batch_corner_cases():
"""
Feature: Batch op
Description: Test batch variations using corner cases:
- where batch_size is greater than the entire epoch, with drop equals to both val
- where Batch op is done before Repeat op with different drop
Expectation: Output is equal to the expected output
"""
def gen(num): def gen(num):
for i in range(num): for i in range(num):
yield (np.array([i]),) yield (np.array([i]),)
@ -192,6 +199,11 @@ def test_get_batchsize_on_callable_batchsize():
def test_basic_batch_map(): def test_basic_batch_map():
"""
Feature: Batch op
Description: Test basic map Batch op with per_batch_map
Expectation: Output is equal to the expected output
"""
def check_res(arr1, arr2): def check_res(arr1, arr2):
for ind, _ in enumerate(arr1): for ind, _ in enumerate(arr1):
if not np.array_equal(arr1[ind], np.array(arr2[ind])): if not np.array_equal(arr1[ind], np.array(arr2[ind])):
@ -225,6 +237,11 @@ def test_basic_batch_map():
def test_batch_multi_col_map(): def test_batch_multi_col_map():
"""
Feature: Batch op
Description: Test map Batch op with multiple columns input with per_batch_map
Expectation: Output is equal to the expected output
"""
def check_res(arr1, arr2): def check_res(arr1, arr2):
for ind, _ in enumerate(arr1): for ind, _ in enumerate(arr1):
if not np.array_equal(arr1[ind], np.array(arr2[ind])): if not np.array_equal(arr1[ind], np.array(arr2[ind])):
@ -274,6 +291,11 @@ def test_batch_multi_col_map():
def test_var_batch_multi_col_map(): def test_var_batch_multi_col_map():
"""
Feature: Batch op
Description: Test Batch op with a function arg for batch_size using multiple columns input with per_batch_map
Expectation: Output is equal to the expected output
"""
def check_res(arr1, arr2): def check_res(arr1, arr2):
for ind, _ in enumerate(arr1): for ind, _ in enumerate(arr1):
if not np.array_equal(arr1[ind], np.array(arr2[ind])): if not np.array_equal(arr1[ind], np.array(arr2[ind])):
@ -314,6 +336,11 @@ def test_var_batch_multi_col_map():
def test_var_batch_var_resize(): def test_var_batch_var_resize():
"""
Feature: Batch op
Description: Test Batch op with a function arg for batch_size with resize as per_batch_map
Expectation: Output is equal to the expected output
"""
# fake resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25) # fake resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25)
def np_psedo_resize(col, batchInfo): def np_psedo_resize(col, batchInfo):
s = (batchInfo.get_batch_num() + 1) ** 2 s = (batchInfo.get_batch_num() + 1) ** 2
@ -332,6 +359,11 @@ def test_var_batch_var_resize():
def test_exception(): def test_exception():
"""
Feature: Batch op
Description: Test Batch op with bad batch size and bad map function
Expectation: Error is raised as expected
"""
def gen(num): def gen(num):
for i in range(num): for i in range(num):
yield (np.array([i]),) yield (np.array([i]),)
@ -362,6 +394,11 @@ def test_exception():
def test_multi_col_map(): def test_multi_col_map():
"""
Feature: Batch op
Description: Test Batch op with multiple columns with various per_batch_map args with valid and invalid inputs
Expectation: Output is equal to the expected output for valid input and error is raised otherwise
"""
def gen_2_cols(num): def gen_2_cols(num):
for i in range(1, 1 + num): for i in range(1, 1 + num):
yield (np.array([i]), np.array([i ** 2])) yield (np.array([i]), np.array([i ** 2]))
@ -427,6 +464,11 @@ def test_multi_col_map():
def test_exceptions_2(): def test_exceptions_2():
"""
Feature: Batch op
Description: Test Batch op with invalid column name and invalid per_batch_map function argument
Expectation: Error is raised as expected
"""
def gen(num): def gen(num):
for i in range(num): for i in range(num):
yield (np.array([i]),) yield (np.array([i]),)

View File

@ -123,7 +123,9 @@ def test_vocab_exception():
def test_lookup_callable(): def test_lookup_callable():
""" """
Test lookup is callable Feature: Python text.Vocab class
Description: Test Lookup with text.Vocab as the argument
Expectation: Output is equal to the expected output
""" """
logger.info("test_lookup_callable") logger.info("test_lookup_callable")
vocab = text.Vocab.from_list(['', '', '', '', '']) vocab = text.Vocab.from_list(['', '', '', '', ''])
@ -133,6 +135,11 @@ def test_lookup_callable():
def test_from_list_tutorial(): def test_from_list_tutorial():
"""
Feature: Python text.Vocab class
Description: Test from_list() method from text.Vocab basic usage tutorial
Expectation: Output is equal to the expected output
"""
vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["<pad>", "<unk>"], True) vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["<pad>", "<unk>"], True)
lookup = text.Lookup(vocab, "<unk>") lookup = text.Lookup(vocab, "<unk>")
data = ds.TextFileDataset(DATA_FILE, shuffle=False) data = ds.TextFileDataset(DATA_FILE, shuffle=False)
@ -145,6 +152,11 @@ def test_from_list_tutorial():
def test_from_file_tutorial(): def test_from_file_tutorial():
"""
Feature: Python text.Vocab class
Description: Test from_file() method from text.Vocab basic usage tutorial
Expectation: Output is equal to the expected output
"""
vocab = text.Vocab.from_file(VOCAB_FILE, ",", None, ["<pad>", "<unk>"], True) vocab = text.Vocab.from_file(VOCAB_FILE, ",", None, ["<pad>", "<unk>"], True)
lookup = text.Lookup(vocab) lookup = text.Lookup(vocab)
data = ds.TextFileDataset(DATA_FILE, shuffle=False) data = ds.TextFileDataset(DATA_FILE, shuffle=False)
@ -157,6 +169,11 @@ def test_from_file_tutorial():
def test_from_dict_tutorial(): def test_from_dict_tutorial():
"""
Feature: Python text.Vocab class
Description: Test from_dict() method from text.Vocab basic usage tutorial
Expectation: Output is equal to the expected output
"""
vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6}) vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "<unk>": 6})
lookup = text.Lookup(vocab, "<unk>") # any unknown token will be mapped to the id of <unk> lookup = text.Lookup(vocab, "<unk>") # any unknown token will be mapped to the id of <unk>
data = ds.TextFileDataset(DATA_FILE, shuffle=False) data = ds.TextFileDataset(DATA_FILE, shuffle=False)
@ -169,6 +186,11 @@ def test_from_dict_tutorial():
def test_from_dict_exception(): def test_from_dict_exception():
"""
Feature: Python text.Vocab class
Description: Test from_dict() method from text.Vocab with invalid input
Expectation: Error is raised as expected
"""
try: try:
vocab = text.Vocab.from_dict({"home": -1, "behind": 0}) vocab = text.Vocab.from_dict({"home": -1, "behind": 0})
if not vocab: if not vocab:
@ -178,6 +200,11 @@ def test_from_dict_exception():
def test_from_list(): def test_from_list():
"""
Feature: Python text.Vocab class
Description: Test from_list() method from text.Vocab with various valid input cases and invalid input
Expectation: Output is equal to the expected output, except for invalid input cases where correct error is raised
"""
def gen(texts): def gen(texts):
for word in texts.split(" "): for word in texts.split(" "):
yield (np.array(word, dtype='S'),) yield (np.array(word, dtype='S'),)
@ -216,6 +243,11 @@ def test_from_list():
def test_from_list_lookup_empty_string(): def test_from_list_lookup_empty_string():
"""
Feature: Python text.Vocab class
Description: Test from_list() with and without empty string in the Lookup op where unknown_token=None
Expectation: Output is equal to the expected output when "" in Lookup op and error is raised otherwise
"""
# "" is a valid word in vocab, which can be looked up by LookupOp # "" is a valid word in vocab, which can be looked up by LookupOp
vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["<pad>", ""], True) vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["<pad>", ""], True)
lookup = text.Lookup(vocab, "") lookup = text.Lookup(vocab, "")
@ -241,6 +273,11 @@ def test_from_list_lookup_empty_string():
def test_from_file(): def test_from_file():
"""
Feature: Python text.Vocab class
Description: Test from_file() method from text.Vocab with various valid and invalid special_tokens and vocab_size
Expectation: Output is equal to the expected output for valid parameters and error is raised otherwise
"""
def gen(texts): def gen(texts):
for word in texts.split(" "): for word in texts.split(" "):
yield (np.array(word, dtype='S'),) yield (np.array(word, dtype='S'),)
@ -272,6 +309,11 @@ def test_from_file():
def test_lookup_cast_type(): def test_lookup_cast_type():
"""
Feature: Python text.Vocab class
Description: Test Lookup op cast type with various valid and invalid data types
Expectation: Output is equal to the expected output for valid data types and error is raised otherwise
"""
def gen(texts): def gen(texts):
for word in texts.split(" "): for word in texts.split(" "):
yield (np.array(word, dtype='S'),) yield (np.array(word, dtype='S'),)