From 229603b4b55556f15aa6f86a9747611323e7be58 Mon Sep 17 00:00:00 2001 From: David Date: Thu, 2 Jun 2022 21:24:09 -0400 Subject: [PATCH] Added docs to ut part 4 --- .jenkins/check/config/filter_pylint.txt | 1 + tests/ut/python/dataset/test_callbacks.py | 62 +++- .../ut/python/dataset/test_concatenate_op.py | 59 +++- tests/ut/python/dataset/test_create_dct.py | 14 +- tests/ut/python/dataset/test_epoch_ctrl.py | 123 ++++++-- tests/ut/python/dataset/test_fill_op.py | 32 +- tests/ut/python/dataset/test_filterop.py | 135 +++++++-- tests/ut/python/dataset/test_minddataset.py | 277 ++++++++++++++---- .../dataset/test_minddataset_exception.py | 65 +++- .../python/dataset/test_minddataset_padded.py | 80 ++++- .../dataset/test_minddataset_sampler.py | 149 +++++++++- tests/ut/python/dataset/test_nlp.py | 12 +- tests/ut/python/dataset/test_noop_mode.py | 12 +- tests/ut/python/dataset/test_opt_pass.py | 19 +- tests/ut/python/dataset/test_pad_batch.py | 26 ++ tests/ut/python/dataset/test_pad_end_op.py | 17 +- tests/ut/python/dataset/test_paddeddataset.py | 114 ++++++- tests/ut/python/dataset/test_pair_truncate.py | 27 +- tests/ut/python/dataset/test_profiling.py | 72 +++-- .../dataset/test_profiling_startstop.py | 29 +- tests/ut/python/dataset/test_project.py | 54 +++- tests/ut/python/dataset/test_pyfunc.py | 58 +++- tests/ut/python/dataset/test_rgb_hsv.py | 27 +- tests/ut/python/dataset/test_sampler.py | 56 ++++ tests/ut/python/dataset/test_slice_op.py | 127 +++++++- tests/ut/python/dataset/test_slice_patches.py | 56 +++- tests/ut/python/dataset/test_split.py | 152 +++++++--- tests/ut/python/dataset/test_tensor_string.py | 73 ++++- tests/ut/python/dataset/test_to_number_op.py | 41 ++- tests/ut/python/dataset/test_vad.py | 44 +-- tests/ut/python/dataset/test_var_batch_map.py | 42 +++ tests/ut/python/dataset/test_vocab.py | 44 ++- 32 files changed, 1808 insertions(+), 291 deletions(-) diff --git a/.jenkins/check/config/filter_pylint.txt b/.jenkins/check/config/filter_pylint.txt index 002faa0240e..3339346c5d6 100644 --- a/.jenkins/check/config/filter_pylint.txt +++ b/.jenkins/check/config/filter_pylint.txt @@ -92,6 +92,7 @@ "mindspore/tests/ut/python/dataset/test_batch.py" "broad-except" "mindspore/tests/ut/python/dataset/test_config.py" "broad-except" "mindspore/tests/ut/python/dataset/test_minddataset.py" "redefined-outer-name" +"mindspore/tests/ut/python/dataset/test_minddataset.py" "unused-variable" "mindspore/tests/ut/python/dataset/test_minddataset_sampler.py" "redefined-outer-name" "mindspore/tests/ut/python/dataset/test_serdes_dataset.py" "redefined-outer-name" "mindspore/tests/ut/python/dataset/test_serdes_dataset.py" "unused-import" diff --git a/tests/ut/python/dataset/test_callbacks.py b/tests/ut/python/dataset/test_callbacks.py index 869527e820f..7487abd56be 100644 --- a/tests/ut/python/dataset/test_callbacks.py +++ b/tests/ut/python/dataset/test_callbacks.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -223,6 +223,11 @@ def build_test_case_2maps(epochs, steps): def test_callbacks_all_methods(): + """ + Feature: Callback + Description: Test Map op with 1 callback with various num_epochs and num_steps args combinations + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_all_methods") build_test_case_1cb(1, 1) @@ -242,6 +247,11 @@ def test_callbacks_all_methods(): def test_callbacks_var_step_size(): + """ + Feature: Callback + Description: Test Map op with 1 callback with step_size=2 and various num_epochs and num_steps args combinations + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_var_step_size") build_test_case_1cb(1, 2, 2) @@ -258,6 +268,11 @@ def test_callbacks_var_step_size(): def test_callbacks_all_2cbs(): + """ + Feature: Callback + Description: Test Map op with 2 callbacks with various num_epochs and num_steps args combinations + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_all_2cbs") build_test_case_2cbs(4, 1) @@ -301,6 +316,11 @@ class Net(nn.Cell): def test_callbacks_non_sink(): + """ + Feature: Callback + Description: Test callbacks with dataset_sink_mode=False in train + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_non_sink") events = [] @@ -325,6 +345,11 @@ def test_callbacks_non_sink(): def test_callbacks_non_sink_batch_size2(): + """ + Feature: Callback + Description: Test callbacks with dataset_sink_mode=False in train after batch(2) is applied to the dataset + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_non_sink_batch_size2") events = [] @@ -349,6 +374,11 @@ def test_callbacks_non_sink_batch_size2(): def test_callbacks_non_sink_mismatch_size(): + """ + Feature: Callback + Description: Test callbacks with dataset_sink_mode=False in train with mismatch size + Expectation: Exception is raised as expected + """ logger.info("test_callbacks_non_sink_mismatch_size") default_timeout = ds.config.get_callback_timeout() ds.config.set_callback_timeout(1) @@ -370,6 +400,11 @@ def test_callbacks_non_sink_mismatch_size(): def test_callbacks_validations(): + """ + Feature: Callback + Description: Test callbacks param in Map op with invalid argument + Expectation: Exception is raised as expected + """ logger.info("test_callbacks_validations") with pytest.raises(Exception) as err: @@ -397,6 +432,11 @@ def test_callbacks_validations(): def test_callbacks_sink_simulation(): + """ + Feature: Callback + Description: Test callbacks under sink simulation + Expectation: Output is equal to the expected output + """ logger.info("test_callback_sink_simulation") events = [] @@ -424,6 +464,11 @@ def test_callbacks_sink_simulation(): def test_callbacks_repeat(): + """ + Feature: Callback + Description: Test Map op with 1 callback with various num_epochs, num_steps, step_size, and repeat args combinations + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_repeat") build_test_case_1cb(epochs=2, steps=2, step_size=1, repeat=2) @@ -453,6 +498,11 @@ def test_callbacks_exceptions(): def test_callbacks_train_end(): + """ + Feature: Callback + Description: Test callback end op under sink simulation + Expectation: Runs successfully + """ logger.info("test_callback_sink_simulation") # No asserts are needed, just test there is no deadlock or exceptions events = [] @@ -469,6 +519,11 @@ def test_callbacks_train_end(): def test_callbacks_one_cb(): + """ + Feature: Callback + Description: Test callbacks with Begin, EpochBegin, EpochEnd, StepBegin, and StepEnd as the args + Expectation: Output is equal to the expected output + """ logger.info("test_callbacks_one_cb") data = ds.NumpySlicesDataset([1, 2, 3, 4], shuffle=False) @@ -510,6 +565,11 @@ def test_callbacks_one_cb(): def test_clear_callback(): + """ + Feature: Callback + Description: Test callback is removed for get_dataset_size and output_shape/type + Expectation: Output is equal to the expected output + """ logger.info("test_clear_callback") # this test case will test that callback is removed for get_dataset_size and output_shape/type diff --git a/tests/ut/python/dataset/test_concatenate_op.py b/tests/ut/python/dataset/test_concatenate_op.py index 5317e35da3f..b3f54e516d1 100644 --- a/tests/ut/python/dataset/test_concatenate_op.py +++ b/tests/ut/python/dataset/test_concatenate_op.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,11 @@ import mindspore.dataset.transforms as data_trans def test_concatenate_op_all(): + """ + Feature: Concatenate op + Description: Test Concatenate op with all input parameters provided + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array([5., 6., 7., 8.], dtype=np.float),) @@ -39,6 +44,11 @@ def test_concatenate_op_all(): def test_concatenate_op_none(): + """ + Feature: Concatenate op + Description: Test Concatenate op with none of the input parameters provided + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array([5., 6., 7., 8.], dtype=np.float),) @@ -51,6 +61,11 @@ def test_concatenate_op_none(): def test_concatenate_op_string(): + """ + Feature: Concatenate op + Description: Test Concatenate op on array of strings + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array(["ss", "ad"], dtype='S'),) @@ -66,6 +81,11 @@ def test_concatenate_op_string(): def test_concatenate_op_multi_input_string(): + """ + Feature: Concatenate op + Description: Test Concatenate op on multi dimension array of strings + Expectation: Output is equal to the expected output + """ prepend_tensor = np.array(["dw", "df"], dtype='S') append_tensor = np.array(["dwsdf", "df"], dtype='S') @@ -82,6 +102,11 @@ def test_concatenate_op_multi_input_string(): def test_concatenate_op_multi_input_numeric(): + """ + Feature: Concatenate op + Description: Test Concatenate op on multi dimension array of ints + Expectation: Output is equal to the expected output + """ prepend_tensor = np.array([3, 5]) data = ([[1, 2]], [[3, 4]]) @@ -97,6 +122,12 @@ def test_concatenate_op_multi_input_numeric(): def test_concatenate_op_type_mismatch(): + """ + Feature: Concatenate op + Description: Test Concatenate op where the data type of the original array dataset (float) has a mismatch + data type with tensor that will be concatenated (string) + Expectation: Error is raised as expected + """ def gen(): yield (np.array([3, 4], dtype=np.float),) @@ -112,6 +143,12 @@ def test_concatenate_op_type_mismatch(): def test_concatenate_op_type_mismatch2(): + """ + Feature: Concatenate op + Description: Test Concatenate op where the data type of the original array dataset (string) has a mismatch + data type with tensor that will be concatenated (float) + Expectation: Error is raised as expected + """ def gen(): yield (np.array(["ss", "ad"], dtype='S'),) @@ -127,6 +164,11 @@ def test_concatenate_op_type_mismatch2(): def test_concatenate_op_incorrect_dim(): + """ + Feature: Concatenate op + Description: Test Concatenate op using original dataset with incorrect dimension + Expectation: Error is raised as expected + """ def gen(): yield (np.array([["ss", "ad"], ["ss", "ad"]], dtype='S'),) @@ -142,12 +184,22 @@ def test_concatenate_op_incorrect_dim(): def test_concatenate_op_wrong_axis(): + """ + Feature: Concatenate op + Description: Test Concatenate op using wrong axis argument + Expectation: Error is raised as expected + """ with pytest.raises(ValueError) as error_info: data_trans.Concatenate(2) assert "only 1D concatenation supported." in str(error_info.value) def test_concatenate_op_negative_axis(): + """ + Feature: Concatenate op + Description: Test Concatenate op using negative axis argument + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array([5., 6., 7., 8.], dtype=np.float),) @@ -163,6 +215,11 @@ def test_concatenate_op_negative_axis(): def test_concatenate_op_incorrect_input_dim(): + """ + Feature: Concatenate op + Description: Test Concatenate op using array that we would like to concatenate with incorrect dimensions + Expectation: Error is raised as expected + """ prepend_tensor = np.array([["ss", "ad"], ["ss", "ad"]], dtype='S') with pytest.raises(ValueError) as error_info: diff --git a/tests/ut/python/dataset/test_create_dct.py b/tests/ut/python/dataset/test_create_dct.py index f136daad930..e6ffa39e2dc 100644 --- a/tests/ut/python/dataset/test_create_dct.py +++ b/tests/ut/python/dataset/test_create_dct.py @@ -32,8 +32,8 @@ def count_unequal_element(data_expected, data_me, rtol, atol): def test_create_dct_none(): """ - Feature: CreateDct - Description: Test CreateDct in eager mode + Feature: Create DCT transformation + Description: Test create_dct in eager mode with no normalization Expectation: The returned result is as expected """ expect = np.array([[2.00000000, 1.84775901], @@ -46,8 +46,8 @@ def test_create_dct_none(): def test_create_dct_ortho(): """ - Feature: CreateDct - Description: Test CreateDct in eager mode + Feature: Create DCT transformation + Description: Test create_dct in eager mode with orthogonal normalization Expectation: The returned result is as expected """ output = create_dct(1, 3, NormMode.ORTHO) @@ -59,9 +59,9 @@ def test_create_dct_ortho(): def test_createdct_invalid_input(): """ - Feature: CreateDct - Description: Error detection - Expectation: Return error + Feature: Create DCT transformation + Description: Test create_dct with invalid inputs + Expectation: Error is raised as expected """ def test_invalid_input(test_name, n_mfcc, n_mels, norm, error, error_msg): logger.info("Test CreateDct with bad input: {0}".format(test_name)) diff --git a/tests/ut/python/dataset/test_epoch_ctrl.py b/tests/ut/python/dataset/test_epoch_ctrl.py index 710b697c844..90186be2908 100644 --- a/tests/ut/python/dataset/test_epoch_ctrl.py +++ b/tests/ut/python/dataset/test_epoch_ctrl.py @@ -1,4 +1,4 @@ -# Copyright 2020-2021 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -38,7 +38,9 @@ def diff_mse(in1, in2): def test_cifar10(): """ - dataset parameter + Feature: Epoch Control op + Description: Test num_epochs as tuple iterator param for Cifar10Dataset + Expectation: Output is equal to the expected output """ logger.info("Test dataset parameter") data_dir_10 = "../data/dataset/testCifar10Data" @@ -70,7 +72,9 @@ def test_cifar10(): def test_decode_op(): """ - Test Decode op + Feature: Epoch Control op + Description: Test num_epochs as dict iterator param for dataset which Decode op has been applied onto it + Expectation: Output is equal to the expected output before iterator is stopped, then correct error is raised """ logger.info("test_decode_op") @@ -125,7 +129,9 @@ def generator_1d(): def test_generator_dict_0(): """ - test generator dict 0 + Feature: Epoch Control op + Description: Test dict iterator inside the loop declaration for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -142,7 +148,9 @@ def test_generator_dict_0(): def test_generator_dict_1(): """ - test generator dict 1 + Feature: Epoch Control op + Description: Test dict iterator outside the epoch for loop for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -162,7 +170,9 @@ def test_generator_dict_1(): def test_generator_dict_2(): """ - test generator dict 2 + Feature: Epoch Control op + Description: Test dict iterator with num_epochs=-1 for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output and iterator never shutdown """ logger.info("Test 1D Generator : 0 - 63") @@ -187,7 +197,9 @@ def test_generator_dict_2(): def test_generator_dict_3(): """ - test generator dict 3 + Feature: Epoch Control op + Description: Test dict iterator with num_epochs=-1 followed by stop for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output before stop, then error is raised """ logger.info("Test 1D Generator : 0 - 63") @@ -213,7 +225,10 @@ def test_generator_dict_3(): def test_generator_dict_4(): """ - test generator dict 4 + Feature: Epoch Control op + Description: Test dict iterator by fetching data beyond the specified number of epochs for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -236,7 +251,11 @@ def test_generator_dict_4(): def test_generator_dict_4_1(): """ - test generator dict 4_1 + Feature: Epoch Control op + Description: Test dict iterator by fetching data beyond the specified number of epochs where num_epochs=1 so + Epoch Control op will not be injected, using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -260,7 +279,11 @@ def test_generator_dict_4_1(): def test_generator_dict_4_2(): """ - test generator dict 4_2 + Feature: Epoch Control op + Description: Test dict iterator by fetching data beyond the specified number of epochs where num_epochs=1 so + Epoch Control op will not be injected, after repeat op with num_repeat=1, using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -286,7 +309,11 @@ def test_generator_dict_4_2(): def test_generator_dict_5(): """ - test generator dict 5 + Feature: Epoch Control op + Description: Test dict iterator by fetching data below (2 loops) then + beyond the specified number of epochs using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -320,7 +347,9 @@ def test_generator_dict_5(): def test_generator_tuple_0(): """ - test generator tuple 0 + Feature: Epoch Control op + Description: Test tuple iterator inside the loop declaration for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -337,7 +366,9 @@ def test_generator_tuple_0(): def test_generator_tuple_1(): """ - test generator tuple 1 + Feature: Epoch Control op + Description: Test tuple iterator outside the epoch for loop for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -357,7 +388,9 @@ def test_generator_tuple_1(): def test_generator_tuple_2(): """ - test generator tuple 2 + Feature: Epoch Control op + Description: Test tuple iterator with num_epochs=-1 for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output and iterator never shutdown """ logger.info("Test 1D Generator : 0 - 63") @@ -381,7 +414,9 @@ def test_generator_tuple_2(): def test_generator_tuple_3(): """ - test generator tuple 3 + Feature: Epoch Control op + Description: Test tuple iterator with num_epochs=-1 followed by stop for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output before stop, then error is raised """ logger.info("Test 1D Generator : 0 - 63") @@ -406,7 +441,10 @@ def test_generator_tuple_3(): def test_generator_tuple_4(): """ - test generator tuple 4 + Feature: Epoch Control op + Description: Test tuple iterator by fetching data beyond the specified num_epochs for 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -429,7 +467,11 @@ def test_generator_tuple_4(): def test_generator_tuple_5(): """ - test generator tuple 5 + Feature: Epoch Control op + Description: Test tuple iterator by fetching data below (2 loops) then + beyond the specified number of epochs using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -462,7 +504,11 @@ def test_generator_tuple_5(): # Test with repeat def test_generator_tuple_repeat_1(): """ - test generator tuple repeat 1 + Feature: Epoch Control op + Description: Test tuple iterator by applying Repeat op first, next fetching data below (2 loops) then + beyond the specified number of epochs using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -496,7 +542,11 @@ def test_generator_tuple_repeat_1(): # Test with repeat def test_generator_tuple_repeat_repeat_1(): """ - test generator tuple repeat repeat 1 + Feature: Epoch Control op + Description: Test tuple iterator by applying Repeat op first twice, next fetching data below (2 loops) then + beyond the specified number of epochs using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output when fetching data under the specified num_epochs, + then error is raised due to EOF buffer encountered """ logger.info("Test 1D Generator : 0 - 63") @@ -530,7 +580,10 @@ def test_generator_tuple_repeat_repeat_1(): def test_generator_tuple_repeat_repeat_2(): """ - test generator tuple repeat repeat 2 + Feature: Epoch Control op + Description: Test tuple iterator with num_epochs=-1 by applying Repeat op first twice, next + stop op is called on the iterator using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output before stop is called, then error is raised """ logger.info("Test 1D Generator : 0 - 63") @@ -557,7 +610,10 @@ def test_generator_tuple_repeat_repeat_2(): def test_generator_tuple_repeat_repeat_3(): """ - test generator tuple repeat repeat 3 + Feature: Epoch Control op + Description: Test tuple iterator by applying Repeat op first twice, then do 2 loops + that the sum of iteration is equal to the specified num_epochs using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -587,7 +643,10 @@ def test_generator_tuple_repeat_repeat_3(): def test_generator_tuple_infinite_repeat_repeat_1(): """ - test generator tuple infinite repeat repeat 1 + Feature: Epoch Control op + Description: Test tuple iterator by applying infinite Repeat then Repeat with specified num_repeat, + then iterate using iterator using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -610,7 +669,10 @@ def test_generator_tuple_infinite_repeat_repeat_1(): def test_generator_tuple_infinite_repeat_repeat_2(): """ - test generator tuple infinite repeat repeat 2 + Feature: Epoch Control op + Description: Test tuple iterator by applying Repeat with specified num_repeat then infinite Repeat, + then iterate using iterator using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -633,7 +695,10 @@ def test_generator_tuple_infinite_repeat_repeat_2(): def test_generator_tuple_infinite_repeat_repeat_3(): """ - test generator tuple infinite repeat repeat 3 + Feature: Epoch Control op + Description: Test tuple iterator by applying infinite Repeat first twice, + then iterate using iterator using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -656,7 +721,10 @@ def test_generator_tuple_infinite_repeat_repeat_3(): def test_generator_tuple_infinite_repeat_repeat_4(): """ - test generator tuple infinite repeat repeat 4 + Feature: Epoch Control op + Description: Test tuple iterator with num_epochs=1 by applying infinite Repeat first twice, + then iterate using iterator using 1D GeneratorDataset 0-63 + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") @@ -679,7 +747,10 @@ def test_generator_tuple_infinite_repeat_repeat_4(): def test_generator_reusedataset(): """ - test generator reusedataset + Feature: Epoch Control op + Description: Test iterator and other op (Repeat/Batch) on 1D GeneratorDataset 0-63 which previously + has been applied with iterator and other op (Repeat/Batch) + Expectation: Output is equal to the expected output """ logger.info("Test 1D Generator : 0 - 63") diff --git a/tests/ut/python/dataset/test_fill_op.py b/tests/ut/python/dataset/test_fill_op.py index a0f9f953522..f6393579888 100644 --- a/tests/ut/python/dataset/test_fill_op.py +++ b/tests/ut/python/dataset/test_fill_op.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,11 @@ import mindspore.dataset.transforms as data_trans def test_fillop_basic(): + """ + Feature: Fill op + Description: Test Fill op basic usage (positive int onto an array of uint8) + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array([4, 5, 6, 7], dtype=np.uint8),) @@ -35,6 +40,11 @@ def test_fillop_basic(): def test_fillop_down_type_cast(): + """ + Feature: Fill op + Description: Test Fill op with a negative number onto an array of unsigned int8 + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array([4, 5, 6, 7], dtype=np.uint8),) @@ -48,6 +58,11 @@ def test_fillop_down_type_cast(): def test_fillop_up_type_cast(): + """ + Feature: Fill op + Description: Test Fill op with a int onto an array of floats + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array([4, 5, 6, 7], dtype=np.float),) @@ -61,6 +76,11 @@ def test_fillop_up_type_cast(): def test_fillop_string(): + """ + Feature: Fill op + Description: Test Fill op with a string onto an array of strings + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array(["45555", "45555"], dtype='S'),) @@ -74,6 +94,11 @@ def test_fillop_string(): def test_fillop_bytes(): + """ + Feature: Fill op + Description: Test Fill op with bytes onto an array of strings + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array(["A", "B", "C"], dtype='S'),) @@ -87,6 +112,11 @@ def test_fillop_bytes(): def test_fillop_error_handling(): + """ + Feature: Fill op + Description: Test Fill op with a mismatch data type (string onto an array of ints) + Expectation: Error is raised as expected + """ def gen(): yield (np.array([4, 4, 4, 4]),) diff --git a/tests/ut/python/dataset/test_filterop.py b/tests/ut/python/dataset/test_filterop.py index a38862fe0bd..ce6ebdf9aeb 100644 --- a/tests/ut/python/dataset/test_filterop.py +++ b/tests/ut/python/dataset/test_filterop.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,8 +22,12 @@ DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" -# test for predicate def test_diff_predicate_func(): + """ + Feature: Filter op + Description: Test Filter op using predicate function as an arg + Expectation: Output is equal to the expected output + """ def test_filter(predicate_func): transforms = [ cde.Decode(), @@ -58,8 +62,12 @@ def generator_1d(): yield (np.array(i),) -# test with GeneratorDataset def test_filter_by_generator_with_no(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) num_iter = 0 @@ -69,8 +77,12 @@ def test_filter_by_generator_with_no(): num_iter += 1 -# test with repeatOp before def test_filter_by_generator_with_repeat(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Repeat op before + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_r = dataset.repeat(4) dataset_f = dataset_r.filter(predicate=filter_func_ge, num_parallel_workers=4) @@ -87,8 +99,12 @@ def test_filter_by_generator_with_repeat(): assert ret_data[index] == expected_rs[ii] -# test with repeatOp after def test_filter_by_generator_with_repeat_after(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Repeat op after + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_f = dataset.filter(predicate=filter_func_ge, num_parallel_workers=4) dataset_r = dataset_f.repeat(4) @@ -113,8 +129,12 @@ def filter_func_batch_after(data): return data <= 20 -# test with batchOp before def test_filter_by_generator_with_batch(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Batch op before + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_b = dataset.batch(4) dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4) @@ -129,8 +149,12 @@ def test_filter_by_generator_with_batch(): assert ret_data[2][0] == 8 -# test with batchOp after def test_filter_by_generator_with_batch_after(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Batch op after + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_f = dataset.filter(predicate=filter_func_batch_after, num_parallel_workers=4) dataset_b = dataset_f.batch(4) @@ -149,8 +173,12 @@ def filter_func_shuffle(data): return data <= 20 -# test with batchOp before def test_filter_by_generator_with_shuffle(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Shuffle op before + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_s = dataset.shuffle(4) dataset_f = dataset_s.filter(predicate=filter_func_shuffle, num_parallel_workers=4) @@ -164,8 +192,12 @@ def filter_func_shuffle_after(data): return data <= 20 -# test with batchOp after def test_filter_by_generator_with_shuffle_after(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Shuffle op after + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_f = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) dataset_s = dataset_f.shuffle(4) @@ -194,8 +226,12 @@ def filter_func_zip_after(data1): return data1 <= 20 -# test with zipOp before def test_filter_by_generator_with_zip(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Zip op before + Expectation: Output is equal to the expected output + """ dataset1 = ds.GeneratorDataset(generator_1d_zip1, ["data1"]) dataset2 = ds.GeneratorDataset(generator_1d_zip2, ["data2"]) dataz = ds.zip((dataset1, dataset2)) @@ -212,8 +248,12 @@ def test_filter_by_generator_with_zip(): assert ret_data[5]["data2"] == 105 -# test with zipOp after def test_filter_by_generator_with_zip_after(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Zip op after + Expectation: Output is equal to the expected output + """ dataset1 = ds.GeneratorDataset(generator_1d_zip1, ["data1"]) dataset2 = ds.GeneratorDataset(generator_1d_zip1, ["data2"]) dt1 = dataset1.filter(predicate=filter_func_zip_after, num_parallel_workers=4) @@ -258,8 +298,12 @@ def func_map_part(data_col1): return data_col1 -# test with map def test_filter_by_generator_with_map_all_col(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Map op before and Filter op is applied to all input columns + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"]) # dataset_map = dataset.map(operations=func_map_part) @@ -274,8 +318,13 @@ def test_filter_by_generator_with_map_all_col(): assert ret_data[1] == 1 -# test with map def test_filter_by_generator_with_map_part_col(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Map op before. + Filter op is only applied partially to the input columns + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) @@ -294,8 +343,12 @@ def filter_func_rename(data): return data > 8 -# test with rename before def test_filter_by_generator_with_rename(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Rename op before + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset_b = dataset.rename(input_columns=["data"], output_columns=["col1"]) dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4) @@ -309,7 +362,6 @@ def test_filter_by_generator_with_rename(): assert ret_data[54] == 63 -# test input_column def filter_func_input_column1(col1, col2): _ = col2 return col1[0] < 8 @@ -324,8 +376,12 @@ def filter_func_input_column3(col1): return True -# test with input_columns def test_filter_by_generator_with_input_column(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with input columns + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"]) dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1, @@ -343,7 +399,6 @@ def test_filter_by_generator_with_input_column(): assert ret_data[7] == 7 -# test kFilterPartial def generator_mc_p0(maxid=20): for i in range(maxid): yield (np.array([i]), np.array([i + 100])) @@ -362,8 +417,13 @@ def filter_func_Partial_0(col1, col2, col3, col4): return True -# test with row_data_buffer > 1 def test_filter_by_generator_Partial0(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Zip op before. + Filter op is only partially applied on the input columns + Expectation: Output is equal to the expected output + """ dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset_zip = ds.zip((dataset1, dataset2)) @@ -375,8 +435,13 @@ def test_filter_by_generator_Partial0(): assert ret[6] == 12 -# test with row_data_buffer > 1 def test_filter_by_generator_Partial1(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Zip op before and Map op after. + Filter op is only partially applied on the input columns + Expectation: Output is equal to the expected output + """ dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) dataset_zip = ds.zip((dataset1, dataset2)) @@ -389,8 +454,13 @@ def test_filter_by_generator_Partial1(): assert ret[6] == 412 -# test with row_data_buffer > 1 def test_filter_by_generator_Partial2(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Zip op after and Map op after the Zip op. + Filter op is only partially applied on the input columns + Expectation: Output is equal to the expected output + """ dataset1 = ds.GeneratorDataset(source=generator_mc_p0(), column_names=["col1", "col2"]) dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) @@ -421,8 +491,13 @@ def generator_big(maxid=20): yield (np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]])) -# test with row_data_buffer > 1 def test_filter_by_generator_Partial(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Shuffle op before. + Filter op is only partially applied on the input columns + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(source=(lambda: generator_mc(99)), column_names=["col1", "col2"]) dataset_s = dataset.shuffle(4) dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1) @@ -436,8 +511,12 @@ def filter_func_cifar(col1, col2): return col2 % 3 == 0 -# test with cifar10 def test_filte_case_dataset_cifar10(): + """ + Feature: Filter op + Description: Test Filter op using Cifar10Dataset + Expectation: Output is equal to the expected output + """ DATA_DIR_10 = "../data/dataset/testCifar10Data" dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False) dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1) @@ -446,8 +525,6 @@ def test_filte_case_dataset_cifar10(): assert item["label"] % 3 == 0 -# column id sort - def generator_sort1(maxid=20): for i in range(maxid): yield (np.array([i]), np.array([i + 100]), np.array([i + 200])) @@ -468,6 +545,11 @@ def filter_func_map_sort(col1, col2, col3): def test_filter_by_generator_with_map_all_sort(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with Zip op before, Filter op is applied to all input columns + Expectation: Output is equal to the expected output + """ dataset1 = ds.GeneratorDataset(generator_sort1(10), ["col1", "col2", "col3"]) dataset2 = ds.GeneratorDataset(generator_sort2(10), ["col4 ", "col5", "col6"]) @@ -485,6 +567,11 @@ def test_filter_by_generator_with_map_all_sort(): def test_filter_by_generator_get_dataset_size(): + """ + Feature: Filter op + Description: Test Filter op using GeneratorDataset with get_dataset_size after + Expectation: Output is equal to the expected output + """ dataset = ds.GeneratorDataset(generator_1d, ["data"]) dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) data_sie = dataset.get_dataset_size() diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py index a7dd241fb83..f2f80ddbce4 100644 --- a/tests/ut/python/dataset/test_minddataset.py +++ b/tests/ut/python/dataset/test_minddataset.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -164,7 +164,11 @@ def add_and_remove_nlp_compress_file(): def test_nlp_compress_data(add_and_remove_nlp_compress_file): - """tutorial for nlp minderdataset.""" + """ + Feature: MindDataset + Description: Test compressing NLP MindDataset + Expectation: Output is equal to the expected output + """ data = [] for row_id in range(16): data.append({ @@ -196,7 +200,11 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): def test_cv_minddataset_writer_tutorial(): - """tutorial for cv dataset writer.""" + """ + Feature: MindDataset + Description: Test MindDataset FileWriter basic usage + Expectation: Runs successfully + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] @@ -226,7 +234,11 @@ def test_cv_minddataset_writer_tutorial(): def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards and shard_id) on MindDataset basic usage + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -249,7 +261,11 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards and shard_id) on MindDataset with num_samples=1 + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -275,7 +291,12 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards and shard_id) on MindDataset + with num_samples > 1 but num_samples <= dataset size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -301,7 +322,12 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards and shard_id) on MindDataset + with num_samples > 1 but num_samples > dataset size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -325,8 +351,14 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): assert partitions(5, 2) == 2 assert partitions(9, 2) == 2 + def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards=1 and shard_id) on MindDataset + with num_samples > 1 and num_samples = dataset size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -342,8 +374,14 @@ def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file): assert num_iter == 5 + def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards=1 and shard_id) on MindDataset + and check that the result is not shuffled + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 num_shards = 3 @@ -383,7 +421,12 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoch(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test partition (using num_shards=1 and shard_id) on MindDataset + and check that the whole result under multiple epochs is not shuffled + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -419,7 +462,11 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset after Repeat op is applied and check that the result is not shuffled + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -514,7 +561,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): def test_cv_minddataset_dataset_size(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test get_dataset_size on MindDataset + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -538,7 +589,11 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset where after multiple Map ops and repeat op the result is not shuffled + Expectation: Output is equal to the expected output + """ columns_list = ["data", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -570,7 +625,11 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test MindDataset when batch_size in Batch op is larger than records + Expectation: Output is equal to the expected output + """ columns_list = ["data", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -595,7 +654,11 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): def test_cv_minddataset_issue_888(add_and_remove_cv_file): - """issue 888 test.""" + """ + Feature: MindDataset + Description: Test MindDataset by applying Shuffle op followed by Repeat op + Expectation: Output is equal to the expected output + """ columns_list = ["data", "label"] num_readers = 2 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -609,7 +672,11 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file): def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset using list of files + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -633,7 +700,11 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset using list of one file + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -656,19 +727,23 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" - CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord" - CV2_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord" + """ + Feature: MindDataset + Description: Test read on MindDataset using 2 datasets that are written with FileWriter + Expectation: Output is equal to the expected output + """ + cv1_file_name = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord" + cv2_file_name = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord" try: - if os.path.exists(CV1_FILE_NAME): - os.remove(CV1_FILE_NAME) - if os.path.exists("{}.db".format(CV1_FILE_NAME)): - os.remove("{}.db".format(CV1_FILE_NAME)) - if os.path.exists(CV2_FILE_NAME): - os.remove(CV2_FILE_NAME) - if os.path.exists("{}.db".format(CV2_FILE_NAME)): - os.remove("{}.db".format(CV2_FILE_NAME)) - writer = FileWriter(CV1_FILE_NAME, 1) + if os.path.exists(cv1_file_name): + os.remove(cv1_file_name) + if os.path.exists("{}.db".format(cv1_file_name)): + os.remove("{}.db".format(cv1_file_name)) + if os.path.exists(cv2_file_name): + os.remove(cv2_file_name) + if os.path.exists("{}.db".format(cv2_file_name)): + os.remove("{}.db".format(cv2_file_name)) + writer = FileWriter(cv1_file_name, 1) data = get_data(CV_DIR_NAME) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, @@ -679,7 +754,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): writer.write_raw_data(data) writer.commit() - writer = FileWriter(CV2_FILE_NAME, 1) + writer = FileWriter(cv2_file_name, 1) data = get_data(CV_DIR_NAME) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, @@ -692,7 +767,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] - data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME], + data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [cv1_file_name, cv2_file_name], columns_list, num_readers) assert data_set.get_dataset_size() == 30 num_iter = 0 @@ -710,29 +785,34 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): num_iter += 1 assert num_iter == 30 except Exception as error: - if os.path.exists(CV1_FILE_NAME): - os.remove(CV1_FILE_NAME) - if os.path.exists("{}.db".format(CV1_FILE_NAME)): - os.remove("{}.db".format(CV1_FILE_NAME)) - if os.path.exists(CV2_FILE_NAME): - os.remove(CV2_FILE_NAME) - if os.path.exists("{}.db".format(CV2_FILE_NAME)): - os.remove("{}.db".format(CV2_FILE_NAME)) + if os.path.exists(cv1_file_name): + os.remove(cv1_file_name) + if os.path.exists("{}.db".format(cv1_file_name)): + os.remove("{}.db".format(cv1_file_name)) + if os.path.exists(cv2_file_name): + os.remove(cv2_file_name) + if os.path.exists("{}.db".format(cv2_file_name)): + os.remove("{}.db".format(cv2_file_name)) raise error else: - if os.path.exists(CV1_FILE_NAME): - os.remove(CV1_FILE_NAME) - if os.path.exists("{}.db".format(CV1_FILE_NAME)): - os.remove("{}.db".format(CV1_FILE_NAME)) - if os.path.exists(CV2_FILE_NAME): - os.remove(CV2_FILE_NAME) - if os.path.exists("{}.db".format(CV2_FILE_NAME)): - os.remove("{}.db".format(CV2_FILE_NAME)) + if os.path.exists(cv1_file_name): + os.remove(cv1_file_name) + if os.path.exists("{}.db".format(cv1_file_name)): + os.remove("{}.db".format(cv1_file_name)) + if os.path.exists(cv2_file_name): + os.remove(cv2_file_name) + if os.path.exists("{}.db".format(cv2_file_name)): + os.remove("{}.db".format(cv2_file_name)) def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): - CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1" - paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) + """ + Feature: MindDataset + Description: Test read on MindDataset using two datasets that are partitioned into two lists + Expectation: Output is equal to the expected output + """ + cv1_file_name = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1" + paths = ["{}{}".format(cv1_file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -740,7 +820,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(CV1_FILE_NAME, FILES_NUM) + writer = FileWriter(cv1_file_name, FILES_NUM) data = get_data(CV_DIR_NAME) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, @@ -755,7 +835,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data_set = ds.MindDataset([file_name + str(x) for x in range(2)] + - [CV1_FILE_NAME + str(x) for x in range(2, 4)], + [cv1_file_name + str(x) for x in range(2, 4)], columns_list, num_readers) assert data_set.get_dataset_size() < 20 num_iter = 0 @@ -784,7 +864,11 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read on MindDataset tutorial + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -807,7 +891,11 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): - """tutorial for nlp minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read on NLP MindDataset tutorial + Expectation: Output is equal to the expected output + """ num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data_set = ds.MindDataset(file_name + "0", None, num_readers) @@ -836,7 +924,11 @@ def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read on MindDataset tutorial with 5 epochs + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -853,7 +945,11 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read on MindDataset tutorial with 5 epochs after Batch op + Expectation: Output is equal to the expected output + """ columns_list = ["data", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -882,7 +978,11 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset with no columns_list + Expectation: Output is equal to the expected output + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data_set = ds.MindDataset(file_name + "0") assert data_set.get_dataset_size() == 10 @@ -903,7 +1003,11 @@ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset after Repeat op is applied on the dataset + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -1117,6 +1221,11 @@ def inputs(vectors, maxlen=50): def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): + """ + Feature: MindDataset + Description: Test write multiple bytes and arrays using FileWriter and read them by MindDataset + Expectation: Output is equal to the expected output + """ mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] try: if os.path.exists("{}".format(mindrecord_file_name)): @@ -1373,6 +1482,11 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): def test_write_with_multi_bytes_and_MindDataset(): + """ + Feature: MindDataset + Description: Test write multiple bytes using FileWriter and read them by MindDataset + Expectation: Output is equal to the expected output + """ mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] try: data = [{"file_name": "001.jpg", "label": 43, @@ -1554,6 +1668,11 @@ def test_write_with_multi_bytes_and_MindDataset(): def test_write_with_multi_array_and_MindDataset(): + """ + Feature: MindDataset + Description: Test write multiple arrays using FileWriter and read them by MindDataset + Expectation: Output is equal to the expected output + """ mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] try: data = [{"source_sos_ids": np.array([1, 2, 3, 4, 5], dtype=np.int64), @@ -1757,6 +1876,11 @@ def test_write_with_multi_array_and_MindDataset(): def test_numpy_generic(): + """ + Feature: MindDataset + Description: Test write numpy generic data types using FileWriter and read them by MindDataset + Expectation: Output is equal to the expected output + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] @@ -1804,6 +1928,12 @@ def test_numpy_generic(): def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(): + """ + Feature: MindDataset + Description: Test write float32, float64, array of float32, and array of float64 using + FileWriter and read them by MindDataset + Expectation: Output is equal to the expected output + """ mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord" try: data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32), @@ -1996,7 +2126,13 @@ def create_multi_mindrecord_files(): os.remove("{}".format(filename)) os.remove("{}.db".format(filename)) + def test_shuffle_with_global_infile_files(create_multi_mindrecord_files): + """ + Feature: MindDataset + Description: Test without and with shuffle args for MindDataset + Expectation: Output is equal to the expected output + """ ds.config.set_seed(1) datas_all = [] index = 0 @@ -2233,7 +2369,13 @@ def test_shuffle_with_global_infile_files(create_multi_mindrecord_files): shard_count += 1 assert origin_index != current_index + def test_distributed_shuffle_with_global_infile_files(create_multi_mindrecord_files): + """ + Feature: MindDataset + Description: Test distributed MindDataset (with num_shards and shard_id) without and with shuffle args + Expectation: Output is equal to the expected output + """ ds.config.set_seed(1) datas_all = [] datas_all_samples = [] @@ -2425,7 +2567,14 @@ def test_distributed_shuffle_with_global_infile_files(create_multi_mindrecord_fi shard_count += 1 assert origin_index != current_index + def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files): + """ + Feature: MindDataset + Description: Test distributed MindDataset (with num_shards and shard_id) + without and with shuffle args under multiple epochs + Expectation: Output is equal to the expected output + """ ds.config.set_seed(1) datas_all = [] datas_all_samples = [] @@ -2588,8 +2737,13 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files): assert datas_epoch2 not in (datas_epoch1, datas_epoch3) assert datas_epoch3 not in (datas_epoch2, datas_epoch1) + def test_field_is_null_numpy(): - """add/remove nlp file""" + """ + Feature: MindDataset + Description: Test MindDataset when field array_d is null + Expectation: Output is equal to the expected output + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] @@ -2655,8 +2809,13 @@ def test_field_is_null_numpy(): os.remove("{}".format(x)) os.remove("{}.db".format(x)) + def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file): - """test for loop dataset iterator""" + """ + Feature: MindDataset + Description: Test for loop for iterator based on MindDataset + Expectation: Output is equal to the expected output + """ data = [] for row_id in range(16): data.append({ diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py index 3c48e5055e7..8455d60c61a 100644 --- a/tests/ut/python/dataset/test_minddataset_exception.py +++ b/tests/ut/python/dataset/test_minddataset_exception.py @@ -74,7 +74,11 @@ def create_diff_page_size_cv_mindrecord(file_name, files_num): def test_cv_lack_json(): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test MindDataset using json file that does not exist + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) columns_list = ["data", "file_name", "label"] @@ -87,7 +91,11 @@ def test_cv_lack_json(): def test_cv_lack_mindrecord(): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test MindDataset using mindrecord that does not exist or no permission + Expectation: Exception is raised as expected + """ columns_list = ["data", "file_name", "label"] num_readers = 4 with pytest.raises(Exception, match="does not exist or permission denied"): @@ -95,6 +103,11 @@ def test_cv_lack_mindrecord(): def test_invalid_mindrecord(): + """ + Feature: MindDataset + Description: Test MindDataset using invalid file (size of mindrecord file header is larger than the upper limit) + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] with open(file_name, 'w') as f: f.write('just for test') @@ -109,6 +122,11 @@ def test_invalid_mindrecord(): def test_minddataset_lack_db(): + """ + Feature: MindDataset + Description: Test MindDataset without .db files + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) os.remove("{}.db".format(file_name)) @@ -140,6 +158,11 @@ def test_cv_minddataset_pk_sample_error_class_column(): def test_cv_minddataset_pk_sample_exclusive_shuffle(): + """ + Feature: MindDataset + Description: Test MindDataset by specifying sampler and shuffle at the same time + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) columns_list = ["data", "file_name", "label"] @@ -156,6 +179,11 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle(): def test_cv_minddataset_reader_different_schema(): + """ + Feature: MindDataset + Description: Test MindDataset by including a file that has a different schema from the others + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name_1 = file_name + '_1' create_cv_mindrecord(file_name, 1) @@ -177,6 +205,11 @@ def test_cv_minddataset_reader_different_schema(): def test_cv_minddataset_reader_different_page_size(): + """ + Feature: MindDataset + Description: Test MindDataset where one of the files has a different page size + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] file_name_1 = file_name + '_1' create_cv_mindrecord(file_name, 1) @@ -199,6 +232,11 @@ def test_cv_minddataset_reader_different_page_size(): def test_minddataset_invalidate_num_shards(): + """ + Feature: MindDataset + Description: Test MindDataset where num_shards is invalid + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) columns_list = ["data", "label"] @@ -222,6 +260,11 @@ def test_minddataset_invalidate_num_shards(): def test_minddataset_invalidate_shard_id(): + """ + Feature: MindDataset + Description: Test MindDataset where shard_id is invalid + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) columns_list = ["data", "label"] @@ -245,6 +288,11 @@ def test_minddataset_invalidate_shard_id(): def test_minddataset_shard_id_bigger_than_num_shard(): + """ + Feature: MindDataset + Description: Test MindDataset where shard_id is bigger than num_shards + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) columns_list = ["data", "label"] @@ -282,7 +330,11 @@ def test_minddataset_shard_id_bigger_than_num_shard(): def test_cv_minddataset_partition_num_samples_equals_0(): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test MindDataset where num_samples is invalid + Expectation: Exception is raised as expected + """ file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] create_cv_mindrecord(file_name, 1) columns_list = ["data", "label"] @@ -312,8 +364,11 @@ def test_cv_minddataset_partition_num_samples_equals_0(): def test_mindrecord_exception(): - """tutorial for exception scenario of minderdataset + map would print error info.""" - + """ + Feature: MindDataset + Description: Test MindDataset by mapping function that will raise Exception and print error info + Expectation: Exception is raised as expected + """ def exception_func(item): raise Exception("Error occur!") diff --git a/tests/ut/python/dataset/test_minddataset_padded.py b/tests/ut/python/dataset/test_minddataset_padded.py index a268adf4eee..79f4e1b3629 100644 --- a/tests/ut/python/dataset/test_minddataset_padded.py +++ b/tests/ut/python/dataset/test_minddataset_padded.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -107,7 +107,11 @@ def add_and_remove_nlp_file(): def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read on MindDataset with padded_sample + Expectation: Output is equal to the expected output + """ columns_list = ["label", "file_name", "data"] data = get_data(CV_DIR_NAME) @@ -135,7 +139,11 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): assert num_iter == 15 def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read on MindDataset with padded_sample which file_name requires type cast + Expectation: Output is equal to the expected output + """ columns_list = ["label", "file_name", "data"] data = get_data(CV_DIR_NAME) @@ -164,7 +172,11 @@ def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset with padded_sample and partition (num_shards and shard_id) + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) @@ -205,7 +217,12 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset with padded_sample and partition (num_shards and shard_id), + performed under multiple epochs + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) @@ -278,7 +295,12 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file): - """tutorial for cv minddataset.""" + """ + Feature: MindDataset + Description: Test read on MindDataset with padded_sample and partition (num_shards and shard_id), + where num_padded is not divisible + Expectation: Error is raised as expected + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) @@ -305,6 +327,12 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test get_dataset_size during MindDataset read with padded_sample and partition + (num_shards and shard_id), where num_padded is not divisible + Expectation: Error is raised as expected + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) @@ -328,6 +356,12 @@ def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_a def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with padded_sample and partition + (num_shards and shard_id), where padded_sample does not match columns_list + Expectation: Error is raised as expected + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) @@ -355,6 +389,12 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with padded_sample and partition + (num_shards and shard_id), where there is no columns_list + Expectation: Error is raised as expected + """ data = get_data(CV_DIR_NAME) padded_sample = data[0] padded_sample['label'] = -2 @@ -380,6 +420,12 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with padded_sample and partition + (num_shards and shard_id), where there is no num_padded + Expectation: Error is raised as expected + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) padded_sample = data[0] @@ -404,6 +450,12 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with padded_sample and partition + (num_shards and shard_id), where there is no padded_sample + Expectation: Error is raised as expected + """ columns_list = ["data", "file_name", "label"] data = get_data(CV_DIR_NAME) padded_sample = data[0] @@ -428,6 +480,11 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): + """ + Feature: MindDataset + Description: Test basic read MindDataset with padded_sample from raw data of aclImdb dataset + Expectation: Output is equal to the expected output + """ columns_list = ["input_ids", "id", "rating"] data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] @@ -469,6 +526,11 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file): + """ + Feature: MindDataset + Description: Test basic read MindDataset with padded_sample from raw data of aclImdb dataset under multiple epochs + Expectation: Output is equal to the expected output + """ columns_list = ["input_ids", "id", "rating"] data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] @@ -535,6 +597,12 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file): + """ + Feature: MindDataset + Description: Test basic read MindDataset with padded_sample from raw data of aclImdb dataset + by checking whole result_per_epoch to ensure there is no reshuffling + Expectation: Output is equal to the expected output + """ columns_list = ["input_ids", "id", "rating"] padded_sample = {} diff --git a/tests/ut/python/dataset/test_minddataset_sampler.py b/tests/ut/python/dataset/test_minddataset_sampler.py index a41c7795fc8..94bf684fdda 100644 --- a/tests/ut/python/dataset/test_minddataset_sampler.py +++ b/tests/ut/python/dataset/test_minddataset_sampler.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -62,7 +62,11 @@ def add_and_remove_cv_file(): def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler without any columns_list in the dataset + Expectation: Output is equal to the expected output + """ num_readers = 4 sampler = ds.PKSampler(2) file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -82,7 +86,11 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read MindDataset with PKSampler + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(2) @@ -105,7 +113,11 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler with shuffle=True + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(3, None, True) @@ -127,7 +139,12 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler with shuffle=True and + with num_samples larger than get_dataset_size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(3, None, True, 'label', 5) @@ -149,7 +166,12 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler with shuffle=True and + with num_samples larger than get_dataset_size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(3, None, True, 'label', 10) @@ -171,7 +193,11 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler with shuffle=True and num_val that is out of range + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True) @@ -192,7 +218,12 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler with shuffle=True, num_val that is out of range, and + num_samples larger than get_dataset_size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True, 'label', 20) @@ -213,7 +244,12 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with PKSampler with shuffle=True, num_val that is out of range, and + num_samples that is equal to get_dataset_size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True, 'label', 10) @@ -234,7 +270,11 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test basic read MindDataset with SubsetRandomSampler + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -259,7 +299,11 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with SubsetRandomSampler with duplicate index in the indices + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 2, 5, 7, 9] @@ -284,7 +328,11 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with SubsetRandomSampler with empty indices + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [] @@ -309,7 +357,11 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file): - """tutorial for cv minderdataset.""" + """ + Feature: MindDataset + Description: Test read MindDataset with SubsetRandomSampler with indices that are out of range + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 4, 11, 13] @@ -334,6 +386,11 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with SubsetRandomSampler with negative indices + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 4, -1, -2] @@ -358,6 +415,11 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test basic read MindDataset with RandomSampler + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -384,6 +446,11 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with RandomSampler followed by Repeat op + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -419,6 +486,11 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with RandomSampler with replacement=True + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -441,6 +513,11 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with RandomSampler with replacement=False and num_samples <= dataset size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -463,6 +540,11 @@ def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_fil def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with RandomSampler with replacement=False and num_samples > dataset size + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -485,6 +567,11 @@ def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_fil def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test basic read MindDataset with SequentialSampler + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -510,6 +597,11 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with SequentialSampler with offset on starting index + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -536,6 +628,12 @@ def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file): def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with SequentialSampler with offset on starting index and + num_samples > dataset size + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -562,6 +660,11 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): def test_cv_minddataset_split_basic(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test basic read MindDataset after Split op is applied + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -599,6 +702,11 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset after Split op is applied using exact percentages + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -636,6 +744,11 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset after Split op is applied using fuzzy percentages + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 @@ -673,6 +786,11 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset after deterministic Split op is applied + Expectation: Output is equal to the expected output + """ columns_list = ["data", "file_name", "label"] num_readers = 4 file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] @@ -714,6 +832,11 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): def test_cv_minddataset_split_sharding(add_and_remove_cv_file): + """ + Feature: MindDataset + Description: Test read MindDataset with DistributedSampler after deterministic Split op is applied + Expectation: Output is equal to the expected output + """ data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 diff --git a/tests/ut/python/dataset/test_nlp.py b/tests/ut/python/dataset/test_nlp.py index ad54672c297..7e59e8a0327 100644 --- a/tests/ut/python/dataset/test_nlp.py +++ b/tests/ut/python/dataset/test_nlp.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,11 @@ MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8" def test_on_tokenized_line(): + """ + Feature: Python text.Vocab class + Description: Test Lookup op on tokenized line using JiebaTokenizer with special_tokens + Expectation: Output is equal to the expected output + """ data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False) jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP) with open(VOCAB_FILE, 'r') as f: @@ -43,6 +48,11 @@ def test_on_tokenized_line(): def test_on_tokenized_line_with_no_special_tokens(): + """ + Feature: Python text.Vocab class + Description: Test Lookup op on tokenized line using JiebaTokenizer without special_tokens + Expectation: Output is equal to the expected output + """ data = ds.TextFileDataset("../data/dataset/testVocab/lines.txt", shuffle=False) jieba_op = text.JiebaTokenizer(HMM_FILE, MP_FILE, mode=text.JiebaMode.MP) with open(VOCAB_FILE, 'r') as f: diff --git a/tests/ut/python/dataset/test_noop_mode.py b/tests/ut/python/dataset/test_noop_mode.py index 0e2eaf40fe9..5dccc24b795 100644 --- a/tests/ut/python/dataset/test_noop_mode.py +++ b/tests/ut/python/dataset/test_noop_mode.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,11 @@ from mindspore import context DATA_DIR = "../data/dataset/testVOC2012" def test_noop_pserver(): + """ + Feature: No-op mode + Description: Test No-op mode support where the MS_ROLE environment is MS_PSERVER + Expectation: Runs successfully + """ os.environ['MS_ROLE'] = 'MS_PSERVER' context.set_ps_context(enable_ps=True) data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) @@ -34,6 +39,11 @@ def test_noop_pserver(): def test_noop_sched(): + """ + Feature: No-op mode + Description: Test No-op mode support where the MS_ROLE environment is MS_SCHED + Expectation: Runs successfully + """ os.environ['MS_ROLE'] = 'MS_SCHED' context.set_ps_context(enable_ps=True) data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) diff --git a/tests/ut/python/dataset/test_opt_pass.py b/tests/ut/python/dataset/test_opt_pass.py index f3402e3279b..59466337527 100644 --- a/tests/ut/python/dataset/test_opt_pass.py +++ b/tests/ut/python/dataset/test_opt_pass.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,6 +21,11 @@ import mindspore.dataset as ds # map dataset with columns order arguments should produce a ProjectOp over MapOp # This test does not utilize the compiling passes at this time. def test_map_reorder0(): + """ + Feature: Map op + Description: Test Map op by applying operation lambda x: x on GeneratorDataset + Expectation: Output is equal to the expected output + """ def generator_mc(maxid=1): for _ in range(maxid): yield (np.array([0]), np.array([1])) @@ -39,6 +44,11 @@ def test_map_reorder0(): # map dataset with columns order arguments should produce a ProjectOp over MapOp # This test does not utilize the compiling passes at this time. def test_map_reorder1(): + """ + Feature: Map op + Description: Test Map op on 2 mapped GeneratorDatasets that are zipped + Expectation: Output is equal to the expected output + """ def generator_mc(maxid=1): for _ in range(maxid): yield (np.array([0]), np.array([1]), np.array([2])) @@ -59,6 +69,11 @@ def test_map_reorder1(): # TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp. # This test does not utilize the compiling passes at this time. def test_shuffle(): + """ + Feature: Shuffle op + Description: Test one dataset with Shuffle.GLOBAL with another dataset with Shuffle.FILES followed by shuffle op + Expectation: Both datasets should be equal + """ FILES = ["../data/dataset/testTFTestAllTypes/test.data"] SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json" @@ -98,4 +113,4 @@ def test_shuffle(): if __name__ == "__main__": test_map_reorder0() test_map_reorder1() - test_global_shuffle() + test_shuffle() diff --git a/tests/ut/python/dataset/test_pad_batch.py b/tests/ut/python/dataset/test_pad_batch.py index f75b6b1ff04..38b62d2cb83 100644 --- a/tests/ut/python/dataset/test_pad_batch.py +++ b/tests/ut/python/dataset/test_pad_batch.py @@ -60,6 +60,11 @@ def gen_var_cols_2d(num): def test_batch_padding_01(): + """ + Feature: Batch Padding + Description: Test batch padding where input_shape=[x] and output_shape=[y] in which y > x + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)}) data1 = data1.repeat(2) @@ -69,6 +74,12 @@ def test_batch_padding_01(): def test_batch_padding_02(): + """ + Feature: Batch Padding + Description: Test batch padding where padding in one dimension and truncate in the other, in which + input_shape=[x1,x2] and output_shape=[y1,y2] and y1 > x1 and y2 < x2 + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)}) data1 = data1.repeat(2) @@ -78,6 +89,11 @@ def test_batch_padding_02(): def test_batch_padding_03(): + """ + Feature: Batch Padding + Description: Test batch padding using automatic padding for a specific column + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically data1 = data1.repeat(2) @@ -91,6 +107,11 @@ def test_batch_padding_03(): def test_batch_padding_04(): + """ + Feature: Batch Padding + Description: Test batch padding using default setting for all columns + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.repeat(2) @@ -100,6 +121,11 @@ def test_batch_padding_04(): def test_batch_padding_05(): + """ + Feature: Batch Padding + Description: Test batch padding where None is in different places + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = data1.batch(batch_size=3, drop_remainder=False, pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically diff --git a/tests/ut/python/dataset/test_pad_end_op.py b/tests/ut/python/dataset/test_pad_end_op.py index 0011d0bf144..c089be4ccd8 100644 --- a/tests/ut/python/dataset/test_pad_end_op.py +++ b/tests/ut/python/dataset/test_pad_end_op.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,6 +35,11 @@ def pad_compare(array, pad_shape, pad_value, res): def test_pad_end_basics(): + """ + Feature: PadEnd op + Description: Test PadEnd op basic usage with array of ints + Expectation: Output is equal to the expected output + """ pad_compare([1, 2], [3], -1, [1, 2, -1]) pad_compare([1, 2, 3], [3], -1, [1, 2, 3]) pad_compare([1, 2, 3], [2], -1, [1, 2]) @@ -42,6 +47,11 @@ def test_pad_end_basics(): def test_pad_end_str(): + """ + Feature: PadEnd op + Description: Test PadEnd op basic usage with array of strings + Expectation: Output is equal to the expected output + """ pad_compare([b"1", b"2"], [3], b"-1", [b"1", b"2", b"-1"]) pad_compare([b"1", b"2", b"3"], [3], b"-1", [b"1", b"2", b"3"]) pad_compare([b"1", b"2", b"3"], [2], b"-1", [b"1", b"2"]) @@ -49,6 +59,11 @@ def test_pad_end_str(): def test_pad_end_exceptions(): + """ + Feature: PadEnd op + Description: Test PadEnd op with invalid inputs + Expectation: Correct error is raised as expected + """ with pytest.raises(RuntimeError) as info: pad_compare([1, 2], [3], "-1", []) assert "pad_value and item of dataset are not of the same type" in str(info.value) diff --git a/tests/ut/python/dataset/test_paddeddataset.py b/tests/ut/python/dataset/test_paddeddataset.py index 6788d9e3d89..79c2e53ea13 100644 --- a/tests/ut/python/dataset/test_paddeddataset.py +++ b/tests/ut/python/dataset/test_paddeddataset.py @@ -1,3 +1,18 @@ +# Copyright 2020-2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + from io import BytesIO import copy import os @@ -39,13 +54,18 @@ def generator_30(): def test_TFRecord_Padded(): - DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] - SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" + """ + Feature: PaddedDataset + Description: Test padding PaddedDataset on TFRecordDataset + Expectation: Output is equal to the expected output + """ + data_dir = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] + schema_dir = "../data/dataset/test_tf_file_3_images/datasetSchema.json" result_list = [[159109, 2], [192607, 3], [179251, 4], [1, 5]] verify_list = [] shard_num = 4 for i in range(shard_num): - data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], + data = ds.TFRecordDataset(data_dir, schema_dir, columns_list=["image"], shuffle=False, shard_equal_rows=True) padded_samples = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)}, @@ -64,6 +84,11 @@ def test_TFRecord_Padded(): def test_GeneratorDataSet_Padded(): + """ + Feature: PaddedDataset + Description: Test padding GeneratorDataset with another GeneratorDataset + Expectation: Output is equal to the expected output + """ result_list = [] for i in range(10): tem_list = [] @@ -88,6 +113,11 @@ def test_GeneratorDataSet_Padded(): def test_Reapeat_afterPadded(): + """ + Feature: PaddedDataset + Description: Test padding PaddedDataset with another PaddedDataset + Expectation: Output is equal to the expected output + """ result_list = [1, 3, 5, 7] verify_list = [] @@ -112,6 +142,11 @@ def test_Reapeat_afterPadded(): def test_bath_afterPadded(): + """ + Feature: PaddedDataset + Description: Test padding PaddedDataset with another PaddedDataset followed by batch op + Expectation: Output is equal to the expected output + """ data1 = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}, {'image': np.zeros(1, np.uint8)}] @@ -130,6 +165,11 @@ def test_bath_afterPadded(): def test_Unevenly_distributed(): + """ + Feature: PaddedDataset + Description: Test padding PaddedDataset with another PaddedDataset that is unevenly distributed + Expectation: Output is equal to the expected output + """ result_list = [[1, 4, 7], [2, 5, 8], [3, 6]] verify_list = [] @@ -156,6 +196,11 @@ def test_Unevenly_distributed(): def test_three_datasets_connected(): + """ + Feature: PaddedDataset + Description: Test padding 3 connected GeneratorDatasets + Expectation: Output is equal to the expected output + """ result_list = [] for i in range(10): tem_list = [] @@ -182,6 +227,11 @@ def test_three_datasets_connected(): def test_raise_error(): + """ + Feature: PaddedDataset + Description: Test padding a PaddedDataset after a batch op with a PaddedDataset, then apply sampler op + Expectation: Correct error is raised as expected + """ data1 = [{'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}, {'image': np.zeros(0, np.uint8)}] @@ -214,8 +264,13 @@ def test_raise_error(): assert excinfo.type == 'ValueError' def test_imagefolder_error(): - DATA_DIR = "../data/dataset/testPK/data" - data = ds.ImageFolderDataset(DATA_DIR, num_samples=14) + """ + Feature: PaddedDataset + Description: Test padding an ImageFolderDataset with num_samples with PaddedDataset + Expectation: Error is raised as expected + """ + data_dir = "../data/dataset/testPK/data" + data = ds.ImageFolderDataset(data_dir, num_samples=14) data1 = [{'image': np.zeros(1, np.uint8), 'label': np.array(0, np.int32)}, {'image': np.zeros(2, np.uint8), 'label': np.array(1, np.int32)}, @@ -232,8 +287,13 @@ def test_imagefolder_error(): assert excinfo.type == 'ValueError' def test_imagefolder_padded(): - DATA_DIR = "../data/dataset/testPK/data" - data = ds.ImageFolderDataset(DATA_DIR) + """ + Feature: PaddedDataset + Description: Test padding an ImageFolderDataset without num_samples with PaddedDataset + Expectation: Output is equal to the expected output + """ + data_dir = "../data/dataset/testPK/data" + data = ds.ImageFolderDataset(data_dir) data1 = [{'image': np.zeros(1, np.uint8), 'label': np.array(0, np.int32)}, {'image': np.zeros(2, np.uint8), 'label': np.array(1, np.int32)}, @@ -256,11 +316,16 @@ def test_imagefolder_padded(): def test_imagefolder_padded_with_decode(): + """ + Feature: PaddedDataset + Description: Test padding an ImageFolderDataset with PaddedDataset followed by a Decode op + Expectation: Output is equal to the expected output + """ num_shards = 5 count = 0 for shard_id in range(num_shards): - DATA_DIR = "../data/dataset/testPK/data" - data = ds.ImageFolderDataset(DATA_DIR) + data_dir = "../data/dataset/testPK/data" + data = ds.ImageFolderDataset(data_dir) white_io = BytesIO() Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG') @@ -285,11 +350,16 @@ def test_imagefolder_padded_with_decode(): def test_imagefolder_padded_with_decode_and_get_dataset_size(): + """ + Feature: PaddedDataset + Description: Test padding an ImageFolderDataset with PaddedDataset followed by get_dataset_size and a Decode op + Expectation: Output is equal to the expected output + """ num_shards = 5 count = 0 for shard_id in range(num_shards): - DATA_DIR = "../data/dataset/testPK/data" - data = ds.ImageFolderDataset(DATA_DIR) + data_dir = "../data/dataset/testPK/data" + data = ds.ImageFolderDataset(data_dir) white_io = BytesIO() Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG') @@ -316,6 +386,12 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size(): def test_more_shard_padded(): + """ + Feature: PaddedDataset + Description: Test padding GeneratorDataset with another GeneratorDataset and + PaddedDataset with another PaddedDataset with larger num_shards used + Expectation: Output is equal to the expected output + """ result_list = [] for i in range(8): result_list.append(1) @@ -429,6 +505,11 @@ def add_and_remove_cv_file(): def test_Mindrecord_Padded(remove_mindrecord_file): + """ + Feature: PaddedDataset + Description: Test padding an MindDataset with PaddedDataset + Expectation: Output is equal to the expected output + """ result_list = [] verify_list = [[1, 2], [3, 4], [5, 11], [6, 12], [7, 13], [8, 14], [9], [10]] num_readers = 4 @@ -453,7 +534,9 @@ def test_Mindrecord_Padded(remove_mindrecord_file): def test_clue_padded_and_skip_with_0_samples(): """ - Test num_samples param of CLUE dataset + Feature: PaddedDataset + Description: Test padding a CLUEDataset with PaddedDataset with and without samples + Expectation: Output is equal to the expected output except when dataset has no samples, in which error is raised """ TRAIN_FILE = '../data/dataset/testCLUE/afqmc/train.json' @@ -494,6 +577,11 @@ def test_clue_padded_and_skip_with_0_samples(): def test_celeba_padded(): + """ + Feature: PaddedDataset + Description: Test padding an CelebADataset with PaddedDataset + Expectation: Output is equal to the expected output + """ data = ds.CelebADataset("../data/dataset/testCelebAData/") padded_samples = [{'image': np.zeros(1, np.uint8), 'attr': np.zeros(1, np.uint32)}] @@ -517,6 +605,6 @@ if __name__ == '__main__': test_Unevenly_distributed() test_three_datasets_connected() test_raise_error() - test_imagefolden_padded() + test_imagefolder_padded() test_more_shard_padded() test_Mindrecord_Padded(add_and_remove_cv_file) diff --git a/tests/ut/python/dataset/test_pair_truncate.py b/tests/ut/python/dataset/test_pair_truncate.py index ae3225212cd..53232e5ed29 100644 --- a/tests/ut/python/dataset/test_pair_truncate.py +++ b/tests/ut/python/dataset/test_pair_truncate.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,6 +31,11 @@ def compare(in1, in2, length, out1, out2): def test_callable(): + """ + Feature: TruncateSequencePair op + Description: Test TruncateSequencePair op using an array of arrays or multiple arrays as the input + Expectation: Output is equal to the expected output + """ op = text.TruncateSequencePair(3) data = [["1", "2", "3"], ["4", "5"]] result_text = op(*data) @@ -42,6 +47,11 @@ def test_callable(): def test_basics(): + """ + Feature: TruncateSequencePair op + Description: Test TruncateSequencePair op basic usage + Expectation: Output is equal to the expected output + """ compare(in1=[1, 2, 3], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5]) compare(in1=[1, 2], in2=[4, 5], length=4, out1=[1, 2], out2=[4, 5]) compare(in1=[1], in2=[4], length=4, out1=[1], out2=[4]) @@ -50,6 +60,11 @@ def test_basics(): def test_basics_odd(): + """ + Feature: TruncateSequencePair op + Description: Test TruncateSequencePair op basic usage when the length is an odd number > 1 + Expectation: Output is equal to the expected output + """ compare(in1=[1, 2, 3], in2=[4, 5], length=3, out1=[1, 2], out2=[4]) compare(in1=[1, 2], in2=[4, 5], length=3, out1=[1, 2], out2=[4]) compare(in1=[1], in2=[4], length=5, out1=[1], out2=[4]) @@ -58,6 +73,11 @@ def test_basics_odd(): def test_basics_str(): + """ + Feature: TruncateSequencePair op + Description: Test TruncateSequencePair op basic usage when the inputs are array of strings + Expectation: Output is equal to the expected output + """ compare(in1=[b"1", b"2", b"3"], in2=[4, 5], length=4, out1=[b"1", b"2"], out2=[4, 5]) compare(in1=[b"1", b"2"], in2=[b"4", b"5"], length=4, out1=[b"1", b"2"], out2=[b"4", b"5"]) compare(in1=[b"1"], in2=[4], length=4, out1=[b"1"], out2=[4]) @@ -66,6 +86,11 @@ def test_basics_str(): def test_exceptions(): + """ + Feature: TruncateSequencePair op + Description: Test TruncateSequencePair op with length=1 + Expectation: Output is equal to the expected output + """ compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1], out2=[]) diff --git a/tests/ut/python/dataset/test_profiling.py b/tests/ut/python/dataset/test_profiling.py index b815fb311b0..fb3e502715f 100644 --- a/tests/ut/python/dataset/test_profiling.py +++ b/tests/ut/python/dataset/test_profiling.py @@ -118,9 +118,10 @@ class TestMinddataProfilingManager: def test_profiling_simple_pipeline(self, tmp_path): """ - Generator -> Shuffle -> Batch + Feature: MindData Profiling Manager + Description: Test MindData profiling simple pipeline (Generator -> Shuffle -> Batch) + Expectation: Runs successfully """ - source = [(np.array([x]),) for x in range(1024)] data1 = ds.GeneratorDataset(source, ["data"]) data1 = data1.shuffle(64) @@ -161,11 +162,15 @@ class TestMinddataProfilingManager: def test_profiling_complex_pipeline(self, tmp_path): """ + Feature: MindData Profiling Manager + Description: Test MindData profiling complex pipeline: + Generator -> Map -> -> Zip TFReader -> Shuffle -> - """ + Expectation: Runs successfully + """ source = [(np.array([x]),) for x in range(1024)] data1 = ds.GeneratorDataset(source, ["gen"]) data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"]) @@ -207,12 +212,15 @@ class TestMinddataProfilingManager: def test_profiling_inline_ops_pipeline1(self, tmp_path): """ - Test pipeline with inline ops: Concat and EpochCtrl + Feature: MindData Profiling Manager + Description: Test MindData profiling pipeline with inline ops (Concat and EpochCtrl): + Generator -> Concat -> EpochCtrl Generator -> - """ + Expectation: Runs successfully + """ # In source1 dataset: Number of rows is 3; its values are 0, 1, 2 def source1(): for i in range(3): @@ -267,10 +275,11 @@ class TestMinddataProfilingManager: def test_profiling_inline_ops_pipeline2(self, tmp_path): """ - Test pipeline with many inline ops - Generator -> Rename -> Skip -> Repeat -> Take + Feature: MindData Profiling Manager + Description: Test MindData profiling pipeline with many inline ops + (Generator -> Rename -> Skip -> Repeat -> Take) + Expectation: Runs successfully """ - # In source1 dataset: Number of rows is 10; its values are 0, 1, 2, 3, 4, 5 ... 9 def source1(): for i in range(10): @@ -314,7 +323,9 @@ class TestMinddataProfilingManager: def test_profiling_sampling_interval(self, tmp_path): """ - Test non-default monitor sampling interval + Feature: MindData Profiling Manager + Description: Test non-default monitor sampling interval + Expectation: Runs successfully """ interval_origin = ds.config.get_monitor_sampling_interval() @@ -349,10 +360,11 @@ class TestMinddataProfilingManager: def test_profiling_basic_pipeline(self, tmp_path): """ - Test with this basic pipeline - Generator -> Map -> Batch -> Repeat -> EpochCtrl + Feature: MindData Profiling Manager + Description: Test MindData profiling pipeline with basic pipeline + (Generator -> Map -> Batch -> Repeat -> EpochCtrl) + Expectation: Runs successfully """ - def source1(): for i in range(8000): yield (np.array([i]),) @@ -402,10 +414,11 @@ class TestMinddataProfilingManager: def test_profiling_cifar10_pipeline(self, tmp_path): """ - Test with this common pipeline with Cifar10 - Cifar10 -> Map -> Map -> Batch -> Repeat + Feature: MindData Profiling Manager + Description: Test MindData profiling with common pipeline with Cifar10 + (Cifar10 -> Map -> Map -> Batch -> Repeat) + Expectation: Runs successfully """ - # Create this common pipeline # Cifar10 -> Map -> Map -> Batch -> Repeat DATA_DIR_10 = "../data/dataset/testCifar10Data" @@ -455,12 +468,13 @@ class TestMinddataProfilingManager: def test_profiling_seq_pipelines_epochctrl3(self, tmp_path): """ - Test with these 2 sequential pipelines: - 1) Generator -> Batch -> EpochCtrl - 2) Generator -> Batch - Note: This is a simplification of the user scenario to use the same pipeline for training and then evaluation. + Feature: MindData Profiling Manager + Description: Test MindData profiling with these 2 sequential pipelines + 1) Generator -> Batch -> EpochCtrl + 2) Generator -> Batch + Note: This is a simplification of the user scenario to use the same pipeline for train and then eval + Expectation: Runs successfully """ - source = [(np.array([x]),) for x in range(64)] data1 = ds.GeneratorDataset(source, ["data"]) data1 = data1.batch(32) @@ -510,11 +524,12 @@ class TestMinddataProfilingManager: def test_profiling_seq_pipelines_epochctrl2(self, tmp_path): """ - Test with these 2 sequential pipelines: - 1) Generator -> Batch - 2) Generator -> Batch -> EpochCtrl + Feature: MindData Profiling Manager + Description: Test MindData profiling with these 2 sequential pipelines + 1) Generator -> Batch + 2) Generator -> Batch -> EpochCtrl + Expectation: Runs successfully """ - source = [(np.array([x]),) for x in range(64)] data2 = ds.GeneratorDataset(source, ["data"]) data2 = data2.batch(16) @@ -564,11 +579,12 @@ class TestMinddataProfilingManager: def test_profiling_seq_pipelines_repeat(self, tmp_path): """ - Test with these 2 sequential pipelines: - 1) Generator -> Batch - 2) Generator -> Batch -> Repeat + Feature: MindData Profiling Manager + Description: Test MindData profiling with these 2 sequential pipelines + 1) Generator -> Batch + 2) Generator -> Batch -> Repeat + Expectation: Runs successfully """ - source = [(np.array([x]),) for x in range(64)] data2 = ds.GeneratorDataset(source, ["data"]) data2 = data2.batch(16) diff --git a/tests/ut/python/dataset/test_profiling_startstop.py b/tests/ut/python/dataset/test_profiling_startstop.py index 3a0f138eb81..682d4667b57 100644 --- a/tests/ut/python/dataset/test_profiling_startstop.py +++ b/tests/ut/python/dataset/test_profiling_startstop.py @@ -1,4 +1,4 @@ -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright 2021-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -95,7 +95,10 @@ class TestMindDataProfilingStartStop: def test_profiling_early_stop(self, tmp_path): """ - Test MindData Profiling with Early Stop; profile for some iterations and then stop profiling + Feature: MindData Profiling Manager + Description: Test MindData profiling with early stop; profile for some iterations and then + stop profiling + Expectation: Runs successfully """ def source1(): for i in range(8000): @@ -138,9 +141,10 @@ class TestMindDataProfilingStartStop: def test_profiling_delayed_start(self, tmp_path): """ - Test MindData Profiling with Delayed Start; profile for subset of iterations + Feature: MindData Profiling Manager + Description: Test MindData profiling with delayed start; profile for subset of iterations + Expectation: Runs successfully """ - def source1(): for i in range(8000): yield (np.array([i]),) @@ -181,9 +185,10 @@ class TestMindDataProfilingStartStop: def test_profiling_multiple_start_stop(self, tmp_path): """ - Test MindData Profiling with Delayed Start and Multiple Start-Stop Sequences + Feature: MindData Profiling Manager + Description: Test MindData profiling with delayed start and multiple start-stop sequences + Expectation: Runs successfully """ - def source1(): for i in range(8000): yield (np.array([i]),) @@ -233,7 +238,9 @@ class TestMindDataProfilingStartStop: def test_profiling_start_start(self): """ - Test MindData Profiling with Start followed by Start - user error scenario + Feature: MindData Profiling Manager + Description: Test MindData profiling with start followed by start + Expectation: Error is raised as expected """ # Initialize MindData profiling manager self.md_profiler.init() @@ -252,7 +259,9 @@ class TestMindDataProfilingStartStop: def test_profiling_stop_stop(self, tmp_path): """ - Test MindData Profiling with Stop followed by Stop - user warning scenario + Feature: MindData Profiling Manager + Description: Test MindData profiling with stop followed by stop + Expectation: Warning is produced """ # Initialize MindData profiling manager self.md_profiler.init() @@ -270,7 +279,9 @@ class TestMindDataProfilingStartStop: def test_profiling_stop_nostart(self): """ - Test MindData Profiling with Stop not without prior Start - user error scenario + Feature: MindData Profiling Manager + Description: Test MindData profiling with stop not without prior start + Expectation: Error is raised as expected """ # Initialize MindData profiling manager self.md_profiler.init() diff --git a/tests/ut/python/dataset/test_project.py b/tests/ut/python/dataset/test_project.py index cf5a2728c62..bdac7581a10 100644 --- a/tests/ut/python/dataset/test_project.py +++ b/tests/ut/python/dataset/test_project.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,11 @@ GENERATE_GOLDEN = False def test_case_project_single_column(): + """ + Feature: Project op + Description: Test Project op on a single column + Expectation: Output is equal to the expected output + """ columns = ["col_sint32"] parameters = {"params": {'columns': columns}} @@ -37,6 +42,11 @@ def test_case_project_single_column(): def test_case_project_multiple_columns_in_order(): + """ + Feature: Project op + Description: Test Project op on multiple columns in order + Expectation: Output is equal to the expected output + """ columns = ["col_sint16", "col_float", "col_2d"] parameters = {"params": {'columns': columns}} @@ -48,6 +58,11 @@ def test_case_project_multiple_columns_in_order(): def test_case_project_multiple_columns_out_of_order(): + """ + Feature: Project op + Description: Test Project op on multiple columns out of order + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -59,6 +74,11 @@ def test_case_project_multiple_columns_out_of_order(): def test_case_project_map(): + """ + Feature: Project op + Description: Test Project op followed by a Map op + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -73,6 +93,11 @@ def test_case_project_map(): def test_case_map_project(): + """ + Feature: Project op + Description: Test Project op after a Map op + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -88,6 +113,11 @@ def test_case_map_project(): def test_case_project_between_maps(): + """ + Feature: Project op + Description: Test Project op between Map ops (Map -> Project -> Map) + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -112,6 +142,11 @@ def test_case_project_between_maps(): def test_case_project_repeat(): + """ + Feature: Project op + Description: Test Project op followed by Repeat op + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -126,6 +161,11 @@ def test_case_project_repeat(): def test_case_repeat_project(): + """ + Feature: Project op + Description: Test Project op after a Repeat op + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -141,6 +181,11 @@ def test_case_repeat_project(): def test_case_map_project_map_project(): + """ + Feature: Project op + Description: Test Map -> Project -> Map -> Project + Expectation: Output is equal to the expected output + """ columns = ["col_3d", "col_sint64", "col_2d"] parameters = {"params": {'columns': columns}} @@ -160,8 +205,11 @@ def test_case_map_project_map_project(): def test_column_order(): - """test the output dict has maintained an insertion order.""" - + """ + Feature: Project op + Description: Test Project op where the output dict should maintain the insertion order + Expectation: Output is equal to the expected output + """ def gen_3_cols(num): for i in range(num): yield (np.array([i * 3]), np.array([i * 3 + 1]), np.array([i * 3 + 2])) diff --git a/tests/ut/python/dataset/test_pyfunc.py b/tests/ut/python/dataset/test_pyfunc.py index 4679bf9a93e..134b2345ae0 100644 --- a/tests/ut/python/dataset/test_pyfunc.py +++ b/tests/ut/python/dataset/test_pyfunc.py @@ -27,7 +27,9 @@ GENERATE_GOLDEN = False def test_case_0(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test 1-1 PyFunc : lambda x : x + x + Expectation: Output is equal to the expected output """ logger.info("Test 1-1 PyFunc : lambda x : x + x") @@ -46,7 +48,9 @@ def test_case_0(): def test_case_1(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test 1-n PyFunc : lambda x : (x, x + x) + Expectation: Output is equal to the expected output """ logger.info("Test 1-n PyFunc : lambda x : (x , x + x) ") @@ -69,7 +73,9 @@ def test_case_1(): def test_case_2(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test n-1 PyFunc : lambda x, y : x + y + Expectation: Output is equal to the expected output """ logger.info("Test n-1 PyFunc : lambda x, y : x + y ") @@ -91,7 +97,9 @@ def test_case_2(): def test_case_3(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test n-m PyFunc : lambda x, y : (x, x + 1, x + y) + Expectation: Output is equal to the expected output """ logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)") @@ -117,7 +125,9 @@ def test_case_3(): def test_case_4(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test parallel n-m PyFunc : lambda x, y : (x, x + 1, x + y) + Expectation: Output is equal to the expected output """ logger.info("Test Parallel n-m PyFunc : lambda x, y : (x , x + 1, x + y)") @@ -149,7 +159,9 @@ def func_5(x): def test_case_5(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test 1-1 PyFunc : lambda x : np.ones(x.shape) + Expectation: Output is equal to the expected output """ logger.info("Test 1-1 PyFunc : lambda x: np.ones(x.shape)") @@ -166,7 +178,9 @@ def test_case_5(): def test_case_6(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x) + Expectation: Output is equal to the expected output """ logger.info("Test PyFunc Compose : (lambda x : x + x), (lambda x : x + x)") @@ -185,7 +199,9 @@ def test_case_6(): def test_case_7(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test 1-1 PyFunc with python_multiprocessing=True : lambda x : x + x + Expectation: Output is equal to the expected output """ logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x") @@ -211,7 +227,9 @@ def test_case_7(): def test_case_8(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test n-m PyFunc with python_multiprocessing=True : lambda x, y : (x, x + 1, x + y) + Expectation: Output is equal to the expected output """ logger.info("Test Multiprocess n-m PyFunc : lambda x, y : (x , x + 1, x + y)") @@ -245,7 +263,9 @@ def test_case_8(): def test_case_9(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test multiple 1-1 PyFunc with python_multiprocessing=True : lambda x : x + x + Expectation: Output is equal to the expected output """ logger.info("Test multiple 1-1 PyFunc Multiprocess: lambda x : x + x") @@ -271,7 +291,9 @@ def test_case_9(): def test_case_10(): """ - Test PyFunc + Feature: PyFunc in Map op + Description: Test multiple map with python_multiprocessing=True : lambda x : x + x + Expectation: Output is equal to the expected output """ logger.info("Test multiple map with multiprocess: lambda x : x + x") @@ -299,7 +321,9 @@ def test_case_10(): def test_pyfunc_implicit_compose(): """ - Test Implicit Compose with pyfunc + Feature: PyFunc in Map op + Description: Test implicit compose with n-m PyFunc : lambda x, y : (x, x + 1, x + y) + Expectation: Output is equal to the expected output """ logger.info("Test n-m PyFunc : lambda x, y : (x , x + 1, x + y)") @@ -324,6 +348,11 @@ def test_pyfunc_implicit_compose(): def test_pyfunc_exception(): + """ + Feature: PyFunc in Map op + Description: Test PyFunc with exception in child pyfunc process + Expectation: Exception is received and test ends gracefully + """ logger.info("Test PyFunc Exception Throw: lambda x : raise Exception()") # Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together, @@ -371,6 +400,11 @@ def test_pyfunc_exception_multiprocess(): def test_func_with_yield_manifest_dataset_01(): + """ + Feature: PyFunc in Map op + Description: Test PyFunc mapping on ManifestDataset + Expectation: Error is raised as expected + """ def pass_func(_): for i in range(10): yield (np.array([i]),) diff --git a/tests/ut/python/dataset/test_rgb_hsv.py b/tests/ut/python/dataset/test_rgb_hsv.py index 269d48df99e..f9351d50879 100644 --- a/tests/ut/python/dataset/test_rgb_hsv.py +++ b/tests/ut/python/dataset/test_rgb_hsv.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -37,6 +37,11 @@ def generate_numpy_random_rgb(shape): def test_rgb_hsv_hwc(): + """ + Feature: RgbToHsv and HsvToRgb ops + Description: Test RgbToHsv and HsvToRgb utilities with an image in HWC format + Expectation: Output is equal to the expected output + """ rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_np = rgb_flat.reshape((8, 8, 3)) hsv_base = np.array([ @@ -62,6 +67,11 @@ def test_rgb_hsv_hwc(): def test_rgb_hsv_batch_hwc(): + """ + Feature: RgbToHsv and HsvToRgb ops + Description: Test RgbToHsv and HsvToRgb utilities with a batch of images in HWC format + Expectation: Output is equal to the expected output + """ rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_np = rgb_flat.reshape((4, 2, 8, 3)) hsv_base = np.array([ @@ -87,6 +97,11 @@ def test_rgb_hsv_batch_hwc(): def test_rgb_hsv_chw(): + """ + Feature: RgbToHsv and HsvToRgb ops + Description: Test RgbToHsv and HsvToRgb utilities with an image in CHW format + Expectation: Output is equal to the expected output + """ rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_np = rgb_flat.reshape((3, 8, 8)) hsv_base = np.array([ @@ -110,6 +125,11 @@ def test_rgb_hsv_chw(): def test_rgb_hsv_batch_chw(): + """ + Feature: RgbToHsv and HsvToRgb ops + Description: Test RgbToHsv and HsvToRgb utilities with a batch of images in HWC format + Expectation: Output is equal to the expected output + """ rgb_flat = generate_numpy_random_rgb((64, 3)).astype(np.float32) rgb_imgs = rgb_flat.reshape((4, 3, 2, 8)) hsv_base_imgs = np.array([ @@ -132,6 +152,11 @@ def test_rgb_hsv_batch_chw(): def test_rgb_hsv_pipeline(): + """ + Feature: RgbToHsv and HsvToRgb ops + Description: Test RgbToHsv and HsvToRgb ops in data pipeline + Expectation: Output is equal to the expected output + """ # First dataset transforms1 = [ vision.Decode(True), diff --git a/tests/ut/python/dataset/test_sampler.py b/tests/ut/python/dataset/test_sampler.py index d1201decb74..79ef2719923 100644 --- a/tests/ut/python/dataset/test_sampler.py +++ b/tests/ut/python/dataset/test_sampler.py @@ -25,6 +25,11 @@ from util import dataset_equal # via the following lookup table (dict){(83554, 0): 0, (54214, 0): 1, (54214, 1): 2, (65512, 0): 3, (64631, 1): 4} def test_sequential_sampler(print_res=False): + """ + Feature: SequentialSampler op + Description: Test SequentialSampler op with various num_samples and num_repeats args combinations + Expectation: Output is equal to the expected output + """ manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} @@ -48,6 +53,11 @@ def test_sequential_sampler(print_res=False): def test_random_sampler(print_res=False): + """ + Feature: RandomSampler op + Description: Test RandomSampler with various replacement, num_samples, and num_repeats args combinations + Expectation: Output is equal to the expected output + """ ds.config.set_seed(1234) manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} @@ -72,6 +82,11 @@ def test_random_sampler(print_res=False): def test_random_sampler_multi_iter(print_res=False): + """ + Feature: RandomSampler op + Description: Test RandomSampler with multiple iteration based on num_repeats + Expectation: Output is equal to the expected output + """ manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} @@ -93,12 +108,22 @@ def test_random_sampler_multi_iter(print_res=False): def test_sampler_py_api(): + """ + Feature: Sampler op + Description: Test add_child op of a Sampler op to a Sampler op + Expectation: Runs successfully + """ sampler = ds.SequentialSampler().parse() sampler1 = ds.RandomSampler().parse() sampler1.add_child(sampler) def test_python_sampler(): + """ + Feature: Python Sampler op + Description: Test Python Sampler op with and without inheritance + Expectation: Output is equal to the expected output + """ manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} @@ -162,6 +187,11 @@ def test_python_sampler(): def test_sequential_sampler2(): + """ + Feature: SequentialSampler op + Description: Test SequentialSampler op with various start_index and num_samples args combinations + Expectation: Output is equal to the expected output + """ manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} @@ -188,6 +218,11 @@ def test_sequential_sampler2(): def test_subset_sampler(): + """ + Feature: SubsetSampler op + Description: Test SubsetSampler op with various indices and num_samples args combinations including invalid ones + Expectation: Output is equal to the expected output when input is valid, otherwise exception is raised + """ def test_config(indices, num_samples=None, exception_msg=None): def pipeline(): sampler = ds.SubsetSampler(indices, num_samples) @@ -245,6 +280,11 @@ def test_subset_sampler(): def test_sampler_chain(): + """ + Feature: Chained Sampler + Description: ManifestDataset with sampler chain; add SequentialSampler as a child for DistributedSampler + Expectation: Correct error is raised as expected + """ manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" map_ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} @@ -279,6 +319,12 @@ def test_sampler_chain(): def test_add_sampler_invalid_input(): + """ + Feature: Sampler op + Description: Test use_sampler op when the arg is not an instance of a sample and + another separate case when num_samples and sampler are specified at the same time in dataset arg + Expectation: Correct error is raised as expected + """ manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" _ = {(172876, 0): 0, (54214, 0): 1, (54214, 1): 2, (173673, 0): 3, (64631, 1): 4} data1 = ds.ManifestDataset(manifest_file) @@ -298,12 +344,22 @@ def test_add_sampler_invalid_input(): def test_distributed_sampler_invalid_offset(): + """ + Feature: DistributedSampler op + Description: Test DistributedSampler op when offset is more than num_shards + Expectation: Error is raised as expected + """ with pytest.raises(RuntimeError) as info: _ = ds.DistributedSampler(num_shards=4, shard_id=0, shuffle=False, num_samples=None, offset=5).parse() assert "DistributedSampler: offset must be no more than num_shards(4)" in str(info.value) def test_sampler_list(): + """ + Feature: Sampler op + Description: Test various sampler args (int and not int) in ImageFolderDataset + Expectation: Output is equal to the expected output when sampler has data type int, otherwise exception is raised + """ data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5]) data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1) data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3) diff --git a/tests/ut/python/dataset/test_slice_op.py b/tests/ut/python/dataset/test_slice_op.py index 8a82c749524..792a027e176 100644 --- a/tests/ut/python/dataset/test_slice_op.py +++ b/tests/ut/python/dataset/test_slice_op.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,18 +33,33 @@ def slice_compare(array, indexing, expected_array): def test_slice_all(): + """ + Feature: Slice op + Description: Test Slice op for whole array (using None, ellipsis, and boolean for Slice op arg) + Expectation: Output is equal to the expected output + """ slice_compare([1, 2, 3, 4, 5], None, [1, 2, 3, 4, 5]) slice_compare([1, 2, 3, 4, 5], ..., [1, 2, 3, 4, 5]) slice_compare([1, 2, 3, 4, 5], True, [1, 2, 3, 4, 5]) def test_slice_single_index(): + """ + Feature: Slice op + Description: Test Slice op with a single index + Expectation: Output is equal to the expected output + """ slice_compare([1, 2, 3, 4, 5], 0, [1]) slice_compare([1, 2, 3, 4, 5], -3, [3]) slice_compare([1, 2, 3, 4, 5], [0], [1]) def test_slice_indices_multidim(): + """ + Feature: Slice op + Description: Test Slice op using a multi dimension arg + Expectation: Output is equal to the expected output + """ slice_compare([[1, 2, 3, 4, 5]], [[0], [0]], 1) slice_compare([[1, 2, 3, 4, 5]], [[0], [0, 3]], [[1, 4]]) slice_compare([[1, 2, 3, 4, 5]], [0], [[1, 2, 3, 4, 5]]) @@ -52,6 +67,11 @@ def test_slice_indices_multidim(): def test_slice_list_index(): + """ + Feature: Slice op + Description: Test Slice op using list of indices as the arg + Expectation: Output is equal to the expected output + """ slice_compare([1, 2, 3, 4, 5], [0, 1, 4], [1, 2, 5]) slice_compare([1, 2, 3, 4, 5], [4, 1, 0], [5, 2, 1]) slice_compare([1, 2, 3, 4, 5], [-1, 1, 0], [5, 2, 1]) @@ -60,12 +80,22 @@ def test_slice_list_index(): def test_slice_index_and_slice(): + """ + Feature: Slice op + Description: Test Slice op where the arg is a list containing slice op + Expectation: Output is equal to the expected output + """ slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), [4]], [[5]]) slice_compare([[1, 2, 3, 4, 5]], [[0], slice(0, 2)], [[1, 2]]) slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [[1], slice(2, 4, 1)], [[7, 8]]) def test_slice_slice_obj_1s(): + """ + Feature: Slice op + Description: Test Slice op where the arg consists of slice op with 1 object + Expectation: Output is equal to the expected output + """ slice_compare([1, 2, 3, 4, 5], slice(1), [1]) slice_compare([1, 2, 3, 4, 5], slice(4), [1, 2, 3, 4]) slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(2), slice(2)], [[1, 2], [5, 6]]) @@ -73,6 +103,11 @@ def test_slice_slice_obj_1s(): def test_slice_slice_obj_2s(): + """ + Feature: Slice op + Description: Test Slice op where the arg consists of slice op with 2 objects + Expectation: Output is equal to the expected output + """ slice_compare([1, 2, 3, 4, 5], slice(0, 2), [1, 2]) slice_compare([1, 2, 3, 4, 5], slice(2, 4), [3, 4]) slice_compare([[1, 2, 3, 4], [5, 6, 7, 8]], [slice(0, 2), slice(1, 2)], [[2], [6]]) @@ -80,6 +115,12 @@ def test_slice_slice_obj_2s(): def test_slice_slice_obj_2s_multidim(): + """ + Feature: Slice op + Description: Test Slice using multi dimension array and Slice op has multi dimension + arg that consists of slice with 2 objects in the arg + Expectation: Output is equal to the expected output + """ slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1)], [[1, 2, 3, 4, 5]]) slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(4)], [[1, 2, 3, 4]]) slice_compare([[1, 2, 3, 4, 5]], [slice(0, 1), slice(0, 3)], [[1, 2, 3]]) @@ -89,7 +130,9 @@ def test_slice_slice_obj_2s_multidim(): def test_slice_slice_obj_3s(): """ - Test passing in all parameters to the slice objects + Feature: Slice op + Description: Test Slice op where the arg consists of slice op with 3 objects + Expectation: Output is equal to the expected output """ slice_compare([1, 2, 3, 4, 5], slice(0, 2, 1), [1, 2]) slice_compare([1, 2, 3, 4, 5], slice(0, 4, 1), [1, 2, 3, 4]) @@ -109,6 +152,11 @@ def test_slice_slice_obj_3s(): def test_slice_obj_3s_double(): + """ + Feature: Slice op + Description: Test Slice op where the arg consists of slice op with 3 objects using an array of doubles + Expectation: Output is equal to the expected output + """ slice_compare([1., 2., 3., 4., 5.], slice(0, 2, 1), [1., 2.]) slice_compare([1., 2., 3., 4., 5.], slice(0, 4, 1), [1., 2., 3., 4.]) slice_compare([1., 2., 3., 4., 5.], slice(0, 5, 2), [1., 3., 5.]) @@ -120,7 +168,9 @@ def test_slice_obj_3s_double(): def test_out_of_bounds_slicing(): """ - Test passing indices outside of the input to the slice objects + Feature: Slice op + Description: Test Slice op with indices outside of the input to the arg + Expectation: Output is equal to the expected output """ slice_compare([1, 2, 3, 4, 5], slice(-15, -1), [1, 2, 3, 4]) slice_compare([1, 2, 3, 4, 5], slice(-15, 15), [1, 2, 3, 4, 5]) @@ -129,7 +179,9 @@ def test_out_of_bounds_slicing(): def test_slice_multiple_rows(): """ - Test passing in multiple rows + Feature: Slice op + Description: Test Slice op with multiple rows + Expectation: Output is equal to the expected output """ dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] exp_dataset = [[], [4, 5], [2], [2, 3, 4]] @@ -147,7 +199,9 @@ def test_slice_multiple_rows(): def test_slice_none_and_ellipsis(): """ - Test passing None and Ellipsis to Slice + Feature: Slice op + Description: Test Slice op by passing None and Ellipsis in the arg + Expectation: Output is equal to the expected output """ dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] exp_dataset = [[1], [3, 4, 5], [1, 2], [1, 2, 3, 4, 5, 6, 7]] @@ -168,6 +222,11 @@ def test_slice_none_and_ellipsis(): def test_slice_obj_neg(): + """ + Feature: Slice op + Description: Test Slice op with indices outside of the input (negative int) to the arg + Expectation: Output is equal to the expected output + """ slice_compare([1, 2, 3, 4, 5], slice(-1, -5, -1), [5, 4, 3, 2]) slice_compare([1, 2, 3, 4, 5], slice(-1), [1, 2, 3, 4]) slice_compare([1, 2, 3, 4, 5], slice(-2), [1, 2, 3]) @@ -177,11 +236,21 @@ def test_slice_obj_neg(): def test_slice_all_str(): + """ + Feature: Slice op + Description: Test Slice op for whole array of strings (using None and ellipsis for the arg) + Expectation: Output is equal to the expected output + """ slice_compare([b"1", b"2", b"3", b"4", b"5"], None, [b"1", b"2", b"3", b"4", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], ..., [b"1", b"2", b"3", b"4", b"5"]) def test_slice_single_index_str(): + """ + Feature: Slice op + Description: Test Slice op with a single index for array of strings + Expectation: Output is equal to the expected output + """ slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1], [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [4], [b"5"]) @@ -190,11 +259,21 @@ def test_slice_single_index_str(): def test_slice_indexes_multidim_str(): + """ + Feature: Slice op + Description: Test Slice op for array of strings using a multi dimensional arg + Expectation: Output is equal to the expected output + """ slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], 0], [[b"1"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], [0, 1]], [[b"1", b"2"]]) def test_slice_list_index_str(): + """ + Feature: Slice op + Description: Test Slice op for array of strings with list of indices as the arg + Expectation: Output is equal to the expected output + """ slice_compare([b"1", b"2", b"3", b"4", b"5"], [0, 1, 4], [b"1", b"2", b"5"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [4, 1, 0], [b"5", b"2", b"1"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], [3, 3, 3], [b"4", b"4", b"4"]) @@ -202,6 +281,11 @@ def test_slice_list_index_str(): # test str index object here def test_slice_index_and_slice_str(): + """ + Feature: Slice op + Description: Test Slice op for array of strings where the arg is a list containing slice op + Expectation: Output is equal to the expected output + """ slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), 4], [[b"5"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [[0], slice(0, 2)], [[b"1", b"2"]]) slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], [[1], slice(2, 4, 1)], @@ -209,6 +293,11 @@ def test_slice_index_and_slice_str(): def test_slice_slice_obj_1s_str(): + """ + Feature: Slice op + Description: Test Slice op for array of strings where the arg consists of slice op with 1 object + Expectation: Output is equal to the expected output + """ slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(1), [b"1"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(4), [b"1", b"2", b"3", b"4"]) slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], @@ -217,6 +306,11 @@ def test_slice_slice_obj_1s_str(): def test_slice_slice_obj_2s_str(): + """ + Feature: Slice op + Description: Test Slice op for array of strings where the arg consists of slice op with 2 objects + Expectation: Output is equal to the expected output + """ slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2), [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(2, 4), [b"3", b"4"]) slice_compare([[b"1", b"2", b"3", b"4"], [b"5", b"6", b"7", b"8"]], @@ -224,6 +318,12 @@ def test_slice_slice_obj_2s_str(): def test_slice_slice_obj_2s_multidim_str(): + """ + Feature: Slice op + Description: Test Slice using multi dimension array of strings and Slice op has multi dimension + arg that consists of slice with 2 objects in the arg + Expectation: Output is equal to the expected output + """ slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1)], [[b"1", b"2", b"3", b"4", b"5"]]) slice_compare([[b"1", b"2", b"3", b"4", b"5"]], [slice(0, 1), slice(4)], [[b"1", b"2", b"3", b"4"]]) @@ -236,7 +336,9 @@ def test_slice_slice_obj_2s_multidim_str(): def test_slice_slice_obj_3s_str(): """ - Test passing in all parameters to the slice objects + Feature: Slice op + Description: Test Slice op for array of strings where the arg consists of slice op with 3 objects + Expectation: Output is equal to the expected output """ slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 2, 1), [b"1", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0, 4, 1), [b"1", b"2", b"3", b"4"]) @@ -260,6 +362,11 @@ def test_slice_slice_obj_3s_str(): def test_slice_obj_neg_str(): + """ + Feature: Slice op + Description: Test Slice op for array of strings with indices outside of the input (negative int) to the arg + Expectation: Output is equal to the expected output + """ slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, -1), [b"5", b"4", b"3", b"2"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1), [b"1", b"2", b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-2), [b"1", b"2", b"3"]) @@ -270,7 +377,9 @@ def test_slice_obj_neg_str(): def test_out_of_bounds_slicing_str(): """ - Test passing indices outside of the input to the slice objects + Feature: Slice op + Description: Test Slice op for array of strings with indices outside of the input to the arg + Expectation: Output is equal to the expected output """ slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, -1), [b"1", b"2", b"3", b"4"]) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-15, 15), [b"1", b"2", b"3", b"4", b"5"]) @@ -286,7 +395,9 @@ def test_out_of_bounds_slicing_str(): def test_slice_exceptions(): """ - Test passing in invalid parameters + Feature: Slice op + Description: Test Slice op with invalid parameters + Expectation: Correct error is raised as expected """ with pytest.raises(RuntimeError) as info: slice_compare([b"1", b"2", b"3", b"4", b"5"], [5], [b"1", b"2", b"3", b"4", b"5"]) diff --git a/tests/ut/python/dataset/test_slice_patches.py b/tests/ut/python/dataset/test_slice_patches.py index c3d14ba2940..4f53757ddfc 100644 --- a/tests/ut/python/dataset/test_slice_patches.py +++ b/tests/ut/python/dataset/test_slice_patches.py @@ -1,4 +1,4 @@ -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright 2021-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,35 +31,45 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" def test_slice_patches_01(plot=False): """ - slice rgb image(100, 200) to 4 patches + Feature: SlicePatches op + Description: Test SlicePatches op on RGB image(100, 200) to 4 patches + Expectation: Output is equal to the expected output """ slice_to_patches([100, 200], 2, 2, True, plot=plot) def test_slice_patches_02(plot=False): """ - no op + Feature: SlicePatches op + Description: Test SlicePatches op on RGB image(100, 200) to 1 patch (no operation being applied) + Expectation: Output is equal to the expected output """ slice_to_patches([100, 200], 1, 1, True, plot=plot) def test_slice_patches_03(plot=False): """ - slice rgb image(99, 199) to 4 patches in pad mode + Feature: SlicePatches op + Description: Test SlicePatches op on RGB image(99, 199) to 4 patches in pad mode + Expectation: Output is equal to the expected output """ slice_to_patches([99, 199], 2, 2, True, plot=plot) def test_slice_patches_04(plot=False): """ - slice rgb image(99, 199) to 4 patches in drop mode + Feature: SlicePatches op + Description: Test SlicePatches op on RGB image(99, 199) to 4 patches in drop mode + Expectation: Output is equal to the expected output """ slice_to_patches([99, 199], 2, 2, False, plot=plot) def test_slice_patches_05(plot=False): """ - slice rgb image(99, 199) to 4 patches in pad mode + Feature: SlicePatches op + Description: Test SlicePatches op on RGB image(99, 199) to 4 patches in pad mode with fill_value=255 + Expectation: Output is equal to the expected output """ slice_to_patches([99, 199], 2, 2, True, 255, plot=plot) @@ -113,7 +123,9 @@ def slice_to_patches(ori_size, num_h, num_w, pad_or_drop, fill_value=0, plot=Fal def test_slice_patches_exception_01(): """ - Test SlicePatches with invalid parameters + Feature: SlicePatches op + Description: Test SlicePatches op with invalid parameters + Expectation: Correct error is raised as expected """ logger.info("test_Slice_Patches_exception") try: @@ -141,6 +153,11 @@ def test_slice_patches_exception_01(): assert "Input fill_value is not within" in str(e) def test_slice_patches_06(): + """ + Feature: SlicePatches op + Description: Test SlicePatches op on random RGB image(158, 126, 1) to 16 patches + Expectation: Output's shape is equal to the expected output's shape + """ image = np.random.randint(0, 255, (158, 126, 1)).astype(np.int32) slice_patches_op = vision.SlicePatches(2, 8) patches = slice_patches_op(image) @@ -148,6 +165,11 @@ def test_slice_patches_06(): assert patches[0].shape == (79, 16, 1) def test_slice_patches_07(): + """ + Feature: SlicePatches op + Description: Test SlicePatches op on random RGB image(158, 126) to 16 patches + Expectation: Output's shape is equal to the expected output's shape + """ image = np.random.randint(0, 255, (158, 126)).astype(np.int32) slice_patches_op = vision.SlicePatches(2, 8) patches = slice_patches_op(image) @@ -155,6 +177,11 @@ def test_slice_patches_07(): assert patches[0].shape == (79, 16) def test_slice_patches_08(): + """ + Feature: SlicePatches op + Description: Test SlicePatches op on random RGB image(1, 56, 82, 256) to 4 patches + Expectation: Output's shape is equal to the expected output's shape + """ np_data = np.random.randint(0, 255, (1, 56, 82, 256)).astype(np.uint8) dataset = ds.NumpySlicesDataset(np_data, column_names=["image"]) slice_patches_op = vision.SlicePatches(2, 2) @@ -166,6 +193,11 @@ def test_slice_patches_08(): assert patch_shape == (28, 41, 256) def test_slice_patches_09(): + """ + Feature: SlicePatches op + Description: Test SlicePatches op on random RGB image(56, 82, 256) to 12 patches with pad mode + Expectation: Output's shape is equal to the expected output's shape + """ image = np.random.randint(0, 255, (56, 82, 256)).astype(np.uint8) slice_patches_op = vision.SlicePatches(4, 3, mode.SliceMode.PAD) patches = slice_patches_op(image) @@ -173,12 +205,22 @@ def test_slice_patches_09(): assert patches[0].shape == (14, 28, 256) def skip_test_slice_patches_10(): + """ + Feature: SlicePatches op + Description: Test SlicePatches op on random RGB image(7000, 7000, 255) to 130 patches with drop mode + Expectation: Output's shape is equal to the expected output's shape + """ image = np.random.randint(0, 255, (7000, 7000, 255)).astype(np.uint8) slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP) patches = slice_patches_op(image) assert patches[0].shape == (700, 538, 255) def skip_test_slice_patches_11(): + """ + Feature: SlicePatches op + Description: Test SlicePatches op on random RGB image(1, 7000, 7000, 256) to 130 patches with drop mode + Expectation: Output's shape is equal to the expected output's shape + """ np_data = np.random.randint(0, 255, (1, 7000, 7000, 256)).astype(np.uint8) dataset = ds.NumpySlicesDataset(np_data, column_names=["image"]) slice_patches_op = vision.SlicePatches(10, 13, mode.SliceMode.DROP) diff --git a/tests/ut/python/dataset/test_split.py b/tests/ut/python/dataset/test_split.py index 3d09685b31b..3af74538d37 100644 --- a/tests/ut/python/dataset/test_split.py +++ b/tests/ut/python/dataset/test_split.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -74,6 +74,12 @@ def split_with_invalid_inputs(d): def test_unmappable_invalid_input(): + """ + Feature: Split op + Description: Test split op using unmappable dataset (TextFileDataset) + with various invalid inputs and applying split op on sharded dataset + Expectation: Correct error is raised as expected + """ d = ds.TextFileDataset(text_file_dataset_path) split_with_invalid_inputs(d) @@ -84,6 +90,12 @@ def test_unmappable_invalid_input(): def test_unmappable_split(): + """ + Feature: Split op + Description: Test split op using unmappable dataset (TextFileDataset) + with absolute rows, exact percentages, and fuzzy percentages as input + Expectation: Output is equal to the expected output + """ original_num_parallel_workers = config_get_set_num_parallel_workers(4) d = ds.TextFileDataset(text_file_dataset_path, shuffle=False) @@ -133,6 +145,11 @@ def test_unmappable_split(): def test_unmappable_randomize_deterministic(): + """ + Feature: Split op + Description: Test split op using unmappable dataset (TextFileDataset) with randomization + Expectation: Output is equal to the expected output + """ original_num_parallel_workers = config_get_set_num_parallel_workers(4) # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] @@ -159,6 +176,11 @@ def test_unmappable_randomize_deterministic(): def test_unmappable_randomize_repeatable(): + """ + Feature: Split op + Description: Test split op using unmappable dataset (TextFileDataset) with randomization followed by repeat op + Expectation: Output is equal to the expected output + """ original_num_parallel_workers = config_get_set_num_parallel_workers(4) # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] @@ -188,6 +210,11 @@ def test_unmappable_randomize_repeatable(): def test_unmappable_get_dataset_size(): + """ + Feature: Split op + Description: Test split op using unmappable dataset (TextFileDataset) followed by get_dataset_size + Expectation: Output is equal to the expected output + """ d = ds.TextFileDataset(text_file_dataset_path, shuffle=False) s1, s2 = d.split([0.8, 0.2]) @@ -197,6 +224,12 @@ def test_unmappable_get_dataset_size(): def test_unmappable_multi_split(): + """ + Feature: Split op + Description: Test split op using unmappable dataset (TextFileDataset) + with randomization followed by deterministic split or another randomized split + Expectation: Output is equal to the expected output + """ original_num_parallel_workers = config_get_set_num_parallel_workers(4) # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] @@ -268,6 +301,12 @@ def test_unmappable_multi_split(): def test_mappable_invalid_input(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) with invalid inputs and + applying split op on sharded dataset + Expectation: Error is raised as expected + """ d = ds.ManifestDataset(manifest_file) split_with_invalid_inputs(d) @@ -278,6 +317,12 @@ def test_mappable_invalid_input(): def test_mappable_split_general(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) + with absolute rows, exact percentages, and fuzzy percentages + Expectation: Output is equal to the expected output + """ d = ds.ManifestDataset(manifest_file, shuffle=False) d = d.take(5) @@ -286,11 +331,11 @@ def test_mappable_split_general(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1, 2, 3] assert s2_output == [4] @@ -300,11 +345,11 @@ def test_mappable_split_general(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1, 2, 3] assert s2_output == [4] @@ -314,17 +359,23 @@ def test_mappable_split_general(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1] assert s2_output == [2, 3, 4] def test_mappable_split_optimized(): + """ + Feature: Split op + Description: Test optimized split op using mappable dataset (ManifestDataset) + with absolute rows, exact percentages, and fuzzy percentages + Expectation: Output is equal to the expected output + """ d = ds.ManifestDataset(manifest_file, shuffle=False) # absolute rows @@ -332,11 +383,11 @@ def test_mappable_split_optimized(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1, 2, 3] assert s2_output == [4] @@ -346,11 +397,11 @@ def test_mappable_split_optimized(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1, 2, 3] assert s2_output == [4] @@ -360,17 +411,22 @@ def test_mappable_split_optimized(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1] assert s2_output == [2, 3, 4] def test_mappable_randomize_deterministic(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) with randomization + Expectation: Output is equal to the expected output + """ # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] ds.config.set_seed(53) @@ -380,11 +436,11 @@ def test_mappable_randomize_deterministic(): for _ in range(10): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) # note no overlap assert s1_output == [0, 1, 3, 4] @@ -392,6 +448,11 @@ def test_mappable_randomize_deterministic(): def test_mappable_randomize_repeatable(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) followed by repeat op + Expectation: Output is equal to the expected output + """ # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] ds.config.set_seed(53) @@ -404,11 +465,11 @@ def test_mappable_randomize_repeatable(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) # note no overlap assert s1_output == [0, 1, 3, 4] * num_epochs @@ -416,6 +477,11 @@ def test_mappable_randomize_repeatable(): def test_mappable_sharding(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) followed by sharding the dataset after split + Expectation: Output is equal to the expected output + """ # set arbitrary seed for repeatability for shard after split # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] ds.config.set_seed(53) @@ -443,12 +509,12 @@ def test_mappable_sharding(): # shard 0 s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) # shard 1 d2s1_output = [] for item in d2s1.create_dict_iterator(num_epochs=1, output_numpy=True): - d2s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + d2s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) rows_per_shard_per_epoch = 2 assert len(s1_output) == rows_per_shard_per_epoch * num_epochs @@ -469,17 +535,22 @@ def test_mappable_sharding(): # test other split s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) d2s2_output = [] for item in d2s2.create_dict_iterator(num_epochs=1, output_numpy=True): - d2s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + d2s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s2_output == [2] assert d2s2_output == [2] def test_mappable_get_dataset_size(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) followed by get_dataset_size + Expectation: Output is equal to the expected output + """ d = ds.ManifestDataset(manifest_file, shuffle=False) s1, s2 = d.split([4, 1]) @@ -489,6 +560,12 @@ def test_mappable_get_dataset_size(): def test_mappable_multi_split(): + """ + Feature: Split op + Description: Test randomized split op using mappable dataset (ManifestDataset) followed by + another split op with and without randomization + Expectation: Output is equal to the expected output + """ # the labels outputted by ManifestDataset for seed 53 is [0, 1, 3, 4, 2] ds.config.set_seed(53) @@ -499,7 +576,7 @@ def test_mappable_multi_split(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == s1_correct_output # no randomize in second split @@ -507,15 +584,15 @@ def test_mappable_multi_split(): s1s1_output = [] for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s1s2_output = [] for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s1s3_output = [] for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): - s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1s3_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1s1_output == [s1_correct_output[0]] assert s1s2_output == [s1_correct_output[1], s1_correct_output[2]] @@ -523,7 +600,7 @@ def test_mappable_multi_split(): s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s2_output == [2] # randomize in second split @@ -534,15 +611,15 @@ def test_mappable_multi_split(): s1s1_output = [] for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s1s2_output = [] for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s1s3_output = [] for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): - s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1s3_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1s1_output == [s1_correct_output[random_sampler_ids[0]]] assert s1s2_output == [s1_correct_output[random_sampler_ids[1]], s1_correct_output[random_sampler_ids[2]]] @@ -550,11 +627,16 @@ def test_mappable_multi_split(): s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s2_output == [2] def test_rounding(): + """ + Feature: Split op + Description: Test split op using mappable dataset (ManifestDataset) with under rounding and over rounding arg + Expectation: Output is equal to the expected output + """ d = ds.ManifestDataset(manifest_file, shuffle=False) # under rounding @@ -562,11 +644,11 @@ def test_rounding(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0, 1, 2] assert s2_output == [3, 4] @@ -576,15 +658,15 @@ def test_rounding(): s1_output = [] for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): - s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s1_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s2_output = [] for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): - s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s2_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) s3_output = [] for item in s3.create_dict_iterator(num_epochs=1, output_numpy=True): - s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) + s3_output.append(manifest_map.get((item["image"].shape[0], item["label"].item()))) assert s1_output == [0] assert s2_output == [1, 2] diff --git a/tests/ut/python/dataset/test_tensor_string.py b/tests/ut/python/dataset/test_tensor_string.py index 7b98f8f527f..69b2dbddb3d 100644 --- a/tests/ut/python/dataset/test_tensor_string.py +++ b/tests/ut/python/dataset/test_tensor_string.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,6 +21,11 @@ from mindspore.dataset.text import to_str, to_bytes def test_basic(): + """ + Feature: Tensor + Description: Test basic Tensor op on NumPy dataset with strings + Expectation: Output is equal to the expected output + """ x = np.array([["ab", "cde", "121"], ["x", "km", "789"]], dtype='S') n = cde.Tensor(x) arr = n.as_array() @@ -40,6 +45,11 @@ def compare(strings, dtype='S'): def test_generator(): + """ + Feature: Tensor + Description: Test string tensor with various valid inputs using GeneratorDataset + Expectation: Output is equal to the expected output + """ compare(["ab"]) compare(["", ""]) compare([""]) @@ -72,6 +82,11 @@ chinese = np.array(["今天天气太好了我们一起去外面玩吧", def test_batching_strings(): + """ + Feature: Tensor + Description: Test applying Batch op to string tensor using GeneratorDataset + Expectation: Output is equal to the expected output + """ def gen(): for row in chinese: yield (np.array(row),) @@ -84,6 +99,11 @@ def test_batching_strings(): def test_map(): + """ + Feature: Tensor + Description: Test applying Map op split to string tensor using GeneratorDataset + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array(["ab cde 121"], dtype='S'),) @@ -101,6 +121,11 @@ def test_map(): def test_map2(): + """ + Feature: Tensor + Description: Test applying Map op upper to string tensor using GeneratorDataset + Expectation: Output is equal to the expected output + """ def gen(): yield (np.array(["ab cde 121"], dtype='S'),) @@ -117,6 +142,11 @@ def test_map2(): def test_tfrecord1(): + """ + Feature: Tensor + Description: Test string tensor using TFRecordDataset with created schema using "string" type + Expectation: Output is equal to the expected output + """ s = ds.Schema() s.add_column("line", "string", []) s.add_column("words", "string", [-1]) @@ -134,6 +164,11 @@ def test_tfrecord1(): def test_tfrecord2(): + """ + Feature: Tensor + Description: Test string tensor using TFRecordDataset with schema from a file + Expectation: Output is equal to the expected output + """ data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema='../data/dataset/testTextTFRecord/datasetSchema.json') for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): @@ -146,6 +181,11 @@ def test_tfrecord2(): def test_tfrecord3(): + """ + Feature: Tensor + Description: Test string tensor using TFRecordDataset with created schema using mstype.string type + Expectation: Output is equal to the expected output + """ s = ds.Schema() s.add_column("line", mstype.string, []) s.add_column("words", mstype.string, [-1, 2]) @@ -184,6 +224,11 @@ def create_text_mindrecord(): def test_mindrecord(): + """ + Feature: Tensor + Description: Test string tensor using MindDataset + Expectation: Output is equal to the expected output + """ data = ds.MindDataset("../data/dataset/testTextMindRecord/test.mindrecord", shuffle=False) for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): @@ -228,6 +273,11 @@ def gen_var_cols_2d(num): def test_batch_padding_01(): + """ + Feature: Batch Padding + Description: Test batch padding where input_shape=[x] and output_shape=[y] in which y > x + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], b"-2"), "col1d": ([2], b"-1")}) data1 = data1.repeat(2) @@ -238,6 +288,12 @@ def test_batch_padding_01(): def test_batch_padding_02(): + """ + Feature: Batch Padding + Description: Test batch padding where padding in one dimension and truncate in the other, in which + input_shape=[x1,x2] and output_shape=[y1,y2] and y1 > x1 and y2 < x2 + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], "")}) data1 = data1.repeat(2) @@ -247,6 +303,11 @@ def test_batch_padding_02(): def test_batch_padding_03(): + """ + Feature: Batch Padding + Description: Test batch padding using automatic padding for a specific column + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, "PAD_VALUE")}) # pad automatically data1 = data1.repeat(2) @@ -260,6 +321,11 @@ def test_batch_padding_03(): def test_batch_padding_04(): + """ + Feature: Batch Padding + Description: Test batch padding using default setting for all columns + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.repeat(2) @@ -269,6 +335,11 @@ def test_batch_padding_04(): def test_batch_padding_05(): + """ + Feature: Batch Padding + Description: Test batch padding where None is in different places + Expectation: Output is equal to the expected output + """ data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = data1.batch(batch_size=3, drop_remainder=False, pad_info={"col2": ([2, None], "-2"), "col1": (None, "-1")}) # pad automatically diff --git a/tests/ut/python/dataset/test_to_number_op.py b/tests/ut/python/dataset/test_to_number_op.py index 6a351cd5892..3134c7791d0 100644 --- a/tests/ut/python/dataset/test_to_number_op.py +++ b/tests/ut/python/dataset/test_to_number_op.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,7 +35,9 @@ def string_dataset_generator(strings): def test_to_number_eager(): """ - Test ToNumber op is callable + Feature: ToNumber op + Description: Test ToNumber op in eager mode with valid and invalid tensor input + Expectation: Output is equal to the expected output for valid tensor and error is raised otherwise """ input_strings = [["1", "2", "3"], ["4", "5", "6"]] op = text.ToNumber(mstype.int8) @@ -59,6 +61,11 @@ def test_to_number_eager(): def test_to_number_typical_case_integral(): + """ + Feature: ToNumber op + Description: Test ToNumber op with int data type + Expectation: Output is equal to the expected output + """ input_strings = [["-121", "14"], ["-2219", "7623"], ["-8162536", "162371864"], ["-1726483716", "98921728421"]] @@ -75,6 +82,11 @@ def test_to_number_typical_case_integral(): def test_to_number_typical_case_non_integral(): + """ + Feature: ToNumber op + Description: Test ToNumber op with float data type + Expectation: Output is equal to the expected output + """ input_strings = [["-1.1", "1.4"], ["-2219.321", "7623.453"], ["-816256.234282", "162371864.243243"]] epsilons = [0.001, 0.001, 0.0001, 0.0001, 0.0000001, 0.0000001] @@ -105,6 +117,11 @@ def out_of_bounds_error_message_check(dataset, np_type, value_to_cast): def test_to_number_out_of_bounds_integral(): + """ + Feature: ToNumber op + Description: Test ToNumber op with values that are out of bounds for int range + Expectation: Error is raised as expected + """ for np_type, ms_type in zip(np_integral_types, ms_integral_types): type_info = np.iinfo(np_type) input_strings = [str(type_info.max + 10)] @@ -119,6 +136,11 @@ def test_to_number_out_of_bounds_integral(): def test_to_number_out_of_bounds_non_integral(): + """ + Feature: ToNumber op + Description: Test ToNumber op with values that are out of bounds for float range + Expectation: Error is raised as expected + """ above_range = [str(np.finfo(np.float16).max * 10), str(np.finfo(np.float32).max * 10), "1.8e+308"] input_strings = [above_range[0]] @@ -179,6 +201,11 @@ def test_to_number_out_of_bounds_non_integral(): def test_to_number_boundaries_integral(): + """ + Feature: ToNumber op + Description: Test ToNumber op with values that are exactly at the boundaries of the range of int + Expectation: Output is equal to the expected output + """ for np_type, ms_type in zip(np_integral_types, ms_integral_types): type_info = np.iinfo(np_type) input_strings = [str(type_info.max)] @@ -201,6 +228,11 @@ def test_to_number_boundaries_integral(): def test_to_number_invalid_input(): + """ + Feature: ToNumber op + Description: Test ToNumber op with invalid input string + Expectation: Error is raised as expected + """ input_strings = ["a8fa9ds8fa"] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(operations=text.ToNumber(mstype.int32), input_columns=["strings"]) @@ -212,6 +244,11 @@ def test_to_number_invalid_input(): def test_to_number_invalid_type(): + """ + Feature: ToNumber op + Description: Test ToNumber op to map into an invalid data type + Expectation: Error is raised as expected + """ with pytest.raises(TypeError) as info: dataset = ds.GeneratorDataset(string_dataset_generator(["a8fa9ds8fa"]), "strings") dataset = dataset.map(operations=text.ToNumber(mstype.bool_), input_columns=["strings"]) diff --git a/tests/ut/python/dataset/test_vad.py b/tests/ut/python/dataset/test_vad.py index 0cf4294562a..d070b28ca38 100644 --- a/tests/ut/python/dataset/test_vad.py +++ b/tests/ut/python/dataset/test_vad.py @@ -1,4 +1,4 @@ -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright 2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -45,9 +45,9 @@ def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): def test_vad_pipeline1(): """ - Feature: Vad - Description: test Vad cpp op in pipeline - Expectation: equal results from Mindspore and benchmark + Feature: Vad op + Description: Test Vad op in pipeline + Expectation: Equal results from Mindspore and benchmark """ # <1000> dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :], @@ -75,9 +75,9 @@ def test_vad_pipeline1(): def test_vad_pipeline2(): """ - Feature: Vad - Description: test Vad cpp op in pipeline - Expectation: equal results from Mindspore and benchmark + Feature: Vad op + Description: Test Vad op in pipeline + Expectation: Equal results from Mindspore and benchmark """ # <1, 1000> trigger level and time dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy") @@ -130,9 +130,9 @@ def test_vad_pipeline2(): def test_vad_pipeline3(): """ - Feature: Vad - Description: test Vad cpp op in pipeline - Expectation: equal results from Mindspore and benchmark + Feature: Vad op + Description: Test Vad op in pipeline + Expectation: Equal results from Mindspore and benchmark """ # <1, 1000> noise dataset = ds.NumpySlicesDataset(np.load(DATA_DIR + "single_channel.npy") @@ -200,9 +200,9 @@ def test_vad_pipeline3(): def test_vad_pipeline_invalid_param1(): """ - Feature: Vad - Description: test Vad with invalid input parameters - Expectation: throw ValueError or TypeError + Feature: Vad op + Description: Test Vad with invalid input parameters + Expectation: Throw ValueError or TypeError """ logger.info("test InverseMelScale op with default values") in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :] @@ -243,9 +243,9 @@ def test_vad_pipeline_invalid_param1(): def test_vad_pipeline_invalid_param2(): """ - Feature: Vad - Description: test Vad with invalid input parameters - Expectation: throw ValueError or TypeError + Feature: Vad op + Description: Test Vad with invalid input parameters + Expectation: Throw ValueError or TypeError """ logger.info("test InverseMelScale op with default values") in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :] @@ -283,9 +283,9 @@ def test_vad_pipeline_invalid_param2(): def test_vad_pipeline_invalid_param3(): """ - Feature: Vad - Description: test Vad with invalid input parameters - Expectation: throw ValueError or TypeError + Feature: Vad op + Description: Test Vad with invalid input parameters + Expectation: Throw ValueError or TypeError """ logger.info("test InverseMelScale op with default values") in_data = np.load(DATA_DIR + "single_channel.npy")[np.newaxis, :] @@ -343,9 +343,9 @@ def test_vad_pipeline_invalid_param3(): def test_vad_eager(): """ - Feature: Vad - Description: test Vad cpp op with eager mode - Expectation: equal results from Mindspore and benchmark + Feature: Vad op + Description: Test Vad op with eager mode + Expectation: Equal results from Mindspore and benchmark """ spectrogram = np.load(DATA_DIR + "single_channel.npy") out_ms = c_audio.Vad(sample_rate=600)(spectrogram) diff --git a/tests/ut/python/dataset/test_var_batch_map.py b/tests/ut/python/dataset/test_var_batch_map.py index e55b4888c4e..c34bad7fd4a 100644 --- a/tests/ut/python/dataset/test_var_batch_map.py +++ b/tests/ut/python/dataset/test_var_batch_map.py @@ -19,6 +19,13 @@ from mindspore import log as logger def test_batch_corner_cases(): + """ + Feature: Batch op + Description: Test batch variations using corner cases: + - where batch_size is greater than the entire epoch, with drop equals to both val + - where Batch op is done before Repeat op with different drop + Expectation: Output is equal to the expected output + """ def gen(num): for i in range(num): yield (np.array([i]),) @@ -192,6 +199,11 @@ def test_get_batchsize_on_callable_batchsize(): def test_basic_batch_map(): + """ + Feature: Batch op + Description: Test basic map Batch op with per_batch_map + Expectation: Output is equal to the expected output + """ def check_res(arr1, arr2): for ind, _ in enumerate(arr1): if not np.array_equal(arr1[ind], np.array(arr2[ind])): @@ -225,6 +237,11 @@ def test_basic_batch_map(): def test_batch_multi_col_map(): + """ + Feature: Batch op + Description: Test map Batch op with multiple columns input with per_batch_map + Expectation: Output is equal to the expected output + """ def check_res(arr1, arr2): for ind, _ in enumerate(arr1): if not np.array_equal(arr1[ind], np.array(arr2[ind])): @@ -274,6 +291,11 @@ def test_batch_multi_col_map(): def test_var_batch_multi_col_map(): + """ + Feature: Batch op + Description: Test Batch op with a function arg for batch_size using multiple columns input with per_batch_map + Expectation: Output is equal to the expected output + """ def check_res(arr1, arr2): for ind, _ in enumerate(arr1): if not np.array_equal(arr1[ind], np.array(arr2[ind])): @@ -314,6 +336,11 @@ def test_var_batch_multi_col_map(): def test_var_batch_var_resize(): + """ + Feature: Batch op + Description: Test Batch op with a function arg for batch_size with resize as per_batch_map + Expectation: Output is equal to the expected output + """ # fake resize image according to its batch number, if it's 5-th batch, resize to (5^2, 5^2) = (25, 25) def np_psedo_resize(col, batchInfo): s = (batchInfo.get_batch_num() + 1) ** 2 @@ -332,6 +359,11 @@ def test_var_batch_var_resize(): def test_exception(): + """ + Feature: Batch op + Description: Test Batch op with bad batch size and bad map function + Expectation: Error is raised as expected + """ def gen(num): for i in range(num): yield (np.array([i]),) @@ -362,6 +394,11 @@ def test_exception(): def test_multi_col_map(): + """ + Feature: Batch op + Description: Test Batch op with multiple columns with various per_batch_map args with valid and invalid inputs + Expectation: Output is equal to the expected output for valid input and error is raised otherwise + """ def gen_2_cols(num): for i in range(1, 1 + num): yield (np.array([i]), np.array([i ** 2])) @@ -427,6 +464,11 @@ def test_multi_col_map(): def test_exceptions_2(): + """ + Feature: Batch op + Description: Test Batch op with invalid column name and invalid per_batch_map function argument + Expectation: Error is raised as expected + """ def gen(num): for i in range(num): yield (np.array([i]),) diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py index ebf33735759..54e40452789 100644 --- a/tests/ut/python/dataset/test_vocab.py +++ b/tests/ut/python/dataset/test_vocab.py @@ -123,7 +123,9 @@ def test_vocab_exception(): def test_lookup_callable(): """ - Test lookup is callable + Feature: Python text.Vocab class + Description: Test Lookup with text.Vocab as the argument + Expectation: Output is equal to the expected output """ logger.info("test_lookup_callable") vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) @@ -133,6 +135,11 @@ def test_lookup_callable(): def test_from_list_tutorial(): + """ + Feature: Python text.Vocab class + Description: Test from_list() method from text.Vocab basic usage tutorial + Expectation: Output is equal to the expected output + """ vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["", ""], True) lookup = text.Lookup(vocab, "") data = ds.TextFileDataset(DATA_FILE, shuffle=False) @@ -145,6 +152,11 @@ def test_from_list_tutorial(): def test_from_file_tutorial(): + """ + Feature: Python text.Vocab class + Description: Test from_file() method from text.Vocab basic usage tutorial + Expectation: Output is equal to the expected output + """ vocab = text.Vocab.from_file(VOCAB_FILE, ",", None, ["", ""], True) lookup = text.Lookup(vocab) data = ds.TextFileDataset(DATA_FILE, shuffle=False) @@ -157,6 +169,11 @@ def test_from_file_tutorial(): def test_from_dict_tutorial(): + """ + Feature: Python text.Vocab class + Description: Test from_dict() method from text.Vocab basic usage tutorial + Expectation: Output is equal to the expected output + """ vocab = text.Vocab.from_dict({"home": 3, "behind": 2, "the": 4, "world": 5, "": 6}) lookup = text.Lookup(vocab, "") # any unknown token will be mapped to the id of data = ds.TextFileDataset(DATA_FILE, shuffle=False) @@ -169,6 +186,11 @@ def test_from_dict_tutorial(): def test_from_dict_exception(): + """ + Feature: Python text.Vocab class + Description: Test from_dict() method from text.Vocab with invalid input + Expectation: Error is raised as expected + """ try: vocab = text.Vocab.from_dict({"home": -1, "behind": 0}) if not vocab: @@ -178,6 +200,11 @@ def test_from_dict_exception(): def test_from_list(): + """ + Feature: Python text.Vocab class + Description: Test from_list() method from text.Vocab with various valid input cases and invalid input + Expectation: Output is equal to the expected output, except for invalid input cases where correct error is raised + """ def gen(texts): for word in texts.split(" "): yield (np.array(word, dtype='S'),) @@ -216,6 +243,11 @@ def test_from_list(): def test_from_list_lookup_empty_string(): + """ + Feature: Python text.Vocab class + Description: Test from_list() with and without empty string in the Lookup op where unknown_token=None + Expectation: Output is equal to the expected output when "" in Lookup op and error is raised otherwise + """ # "" is a valid word in vocab, which can be looked up by LookupOp vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["", ""], True) lookup = text.Lookup(vocab, "") @@ -241,6 +273,11 @@ def test_from_list_lookup_empty_string(): def test_from_file(): + """ + Feature: Python text.Vocab class + Description: Test from_file() method from text.Vocab with various valid and invalid special_tokens and vocab_size + Expectation: Output is equal to the expected output for valid parameters and error is raised otherwise + """ def gen(texts): for word in texts.split(" "): yield (np.array(word, dtype='S'),) @@ -272,6 +309,11 @@ def test_from_file(): def test_lookup_cast_type(): + """ + Feature: Python text.Vocab class + Description: Test Lookup op cast type with various valid and invalid data types + Expectation: Output is equal to the expected output for valid data types and error is raised otherwise + """ def gen(texts): for word in texts.split(" "): yield (np.array(word, dtype='S'),)