From d9aa18dfcae7495720bbece48155fee28e7b8dd9 Mon Sep 17 00:00:00 2001 From: liyong Date: Mon, 18 Oct 2021 17:55:28 +0800 Subject: [PATCH] refactor md ut --- tests/ut/python/dataset/test_minddataset.py | 143 ++++----- .../python/dataset/test_minddataset_padded.py | 54 ++-- .../dataset/test_minddataset_sampler.py | 85 ++++-- tests/ut/python/dataset/test_save_op.py | 204 ++++++++----- .../ut/python/dataset/test_serdes_dataset.py | 34 --- .../mindrecord/test_mindrecord_exception.py | 288 ++++++++++++------ 6 files changed, 478 insertions(+), 330 deletions(-) diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py index c53408ca6a4..06a60b50684 100644 --- a/tests/ut/python/dataset/test_minddataset.py +++ b/tests/ut/python/dataset/test_minddataset.py @@ -31,20 +31,15 @@ from mindspore.dataset.vision import Inter from mindspore.mindrecord import FileWriter FILES_NUM = 4 -CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" -CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord" -CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord" CV_DIR_NAME = "../data/mindrecord/testImageNetData" -NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord" -OLD_NLP_FILE_NAME = "../data/mindrecord/testOldVersion/aclImdb.mindrecord" NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos" NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt" - @pytest.fixture def add_and_remove_cv_file(): """add/remove cv file""" - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -52,7 +47,7 @@ def add_and_remove_cv_file(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(CV_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = get_data(CV_DIR_NAME) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, @@ -77,7 +72,8 @@ def add_and_remove_cv_file(): @pytest.fixture def add_and_remove_nlp_file(): """add/remove nlp file""" - paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -85,7 +81,7 @@ def add_and_remove_nlp_file(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(NLP_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"}, "rating": {"type": "float32"}, @@ -117,7 +113,8 @@ def add_and_remove_nlp_file(): @pytest.fixture def add_and_remove_nlp_compress_file(): """add/remove nlp file""" - paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -125,7 +122,7 @@ def add_and_remove_nlp_compress_file(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(NLP_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = [] for row_id in range(16): data.append({ @@ -183,8 +180,9 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): "array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1]) }) num_readers = 1 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data_set = ds.MindDataset( - NLP_FILE_NAME + "0", None, num_readers, shuffle=False) + file_name + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 16 num_iter = 0 for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): @@ -197,29 +195,10 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): assert num_iter == 16 -def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file): - """tutorial for nlp minderdataset.""" - num_readers = 1 - data_set = ds.MindDataset( - NLP_FILE_NAME + "0", None, num_readers, shuffle=False) - old_data_set = ds.MindDataset( - OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False) - assert old_data_set.get_dataset_size() == 16 - num_iter = 0 - for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True), - data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): - assert (item["array_a"] == x["array_a"]).all() - assert (item["array_b"] == x["array_b"]).all() - assert (item["array_c"] == x["array_c"]).all() - assert (item["array_d"] == x["array_d"]).all() - assert item["label"] == x["label"] - num_iter += 1 - assert num_iter == 16 - - def test_cv_minddataset_writer_tutorial(): """tutorial for cv dataset writer.""" - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -227,7 +206,7 @@ def test_cv_minddataset_writer_tutorial(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(CV_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = get_data(CV_DIR_NAME) cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}} @@ -250,10 +229,11 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id) num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -272,10 +252,11 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, num_samples=1) @@ -297,10 +278,11 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, num_samples=2) @@ -322,10 +304,11 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, expect): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, num_samples=3) @@ -346,8 +329,9 @@ def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5) + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5) assert data_set.get_dataset_size() == 5 num_iter = 0 @@ -366,9 +350,10 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c epoch1 = [] epoch2 = [] epoch3 = [] + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id) data_set = data_set.repeat(3) @@ -401,13 +386,14 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] num_shards = 3 epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id) data_set = data_set.repeat(3) @@ -436,13 +422,14 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] ds.config.set_seed(54321) epoch1 = [] epoch2 = [] epoch3 = [] - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) data_set = data_set.repeat(3) num_iter = 0 @@ -468,7 +455,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): epoch2_new_dataset = [] epoch3_new_dataset = [] - data_set2 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + data_set2 = ds.MindDataset(file_name + "0", columns_list, num_readers) data_set2 = data_set2.repeat(3) num_iter = 0 @@ -499,7 +486,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): epoch2_new_dataset2 = [] epoch3_new_dataset2 = [] - data_set3 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + data_set3 = ds.MindDataset(file_name + "0", columns_list, num_readers) data_set3 = data_set3.repeat(3) num_iter = 0 @@ -530,7 +517,8 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) assert data_set.get_dataset_size() == 10 repeat_num = 2 data_set = data_set.repeat(repeat_num) @@ -544,7 +532,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): "-------------- item[data]: {} ----------------------".format(item["data"])) num_iter += 1 assert num_iter == 20 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=4, shard_id=3) assert data_set.get_dataset_size() == 3 @@ -553,7 +541,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) decode_op = vision.Decode() data_set = data_set.map( input_columns=["data"], operations=decode_op, num_parallel_workers=2) @@ -584,7 +573,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) decode_op = vision.Decode() data_set = data_set.map( input_columns=["data"], operations=decode_op, num_parallel_workers=2) @@ -608,7 +598,8 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file): """issue 888 test.""" columns_list = ["data", "label"] num_readers = 2 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1) data_set = data_set.shuffle(2) data_set = data_set.repeat(9) num_iter = 0 @@ -621,7 +612,8 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset([CV_FILE_NAME + str(x) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)], columns_list, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 @@ -644,7 +636,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset([file_name + "0"], columns_list, num_readers) assert data_set.get_dataset_size() < 10 num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -664,6 +657,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): """tutorial for cv minderdataset.""" + CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord" + CV2_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord" try: if os.path.exists(CV1_FILE_NAME): os.remove(CV1_FILE_NAME) @@ -696,7 +691,8 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): writer.commit() columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME], + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME], columns_list, num_readers) assert data_set.get_dataset_size() == 30 num_iter = 0 @@ -735,6 +731,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): + CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1" paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: @@ -756,7 +753,8 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(2)] + + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset([file_name + str(x) for x in range(2)] + [CV1_FILE_NAME + str(x) for x in range(2, 4)], columns_list, num_readers) assert data_set.get_dataset_size() < 20 @@ -789,7 +787,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -810,7 +809,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): """tutorial for nlp minderdataset.""" num_readers = 4 - data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", None, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -839,7 +839,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) assert data_set.get_dataset_size() == 10 for _ in range(5): num_iter = 0 @@ -855,7 +856,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ """tutorial for cv minderdataset.""" columns_list = ["data", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) resize_height = 32 resize_width = 32 @@ -881,7 +883,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): """tutorial for cv minderdataset.""" - data_set = ds.MindDataset(CV_FILE_NAME + "0") + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0") assert data_set.get_dataset_size() == 10 num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -903,7 +906,8 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers) repeat_num = 2 data_set = data_set.repeat(repeat_num) num_iter = 0 @@ -1753,7 +1757,8 @@ def test_write_with_multi_array_and_MindDataset(): def test_numpy_generic(): - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -1761,7 +1766,7 @@ def test_numpy_generic(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(CV_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) cv_schema_json = {"label1": {"type": "int32"}, "label2": {"type": "int64"}, "label3": {"type": "float32"}, "label4": {"type": "float64"}} data = [] @@ -1777,7 +1782,7 @@ def test_numpy_generic(): writer.commit() num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False) + data_set = ds.MindDataset(file_name + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 10 idx = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -1799,7 +1804,7 @@ def test_numpy_generic(): def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(): - mindrecord_file_name = "test.mindrecord" + mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord" try: data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32), "float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471, @@ -2570,7 +2575,8 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files): def test_field_is_null_numpy(): """add/remove nlp file""" - paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] for x in paths: if os.path.exists("{}".format(x)): @@ -2578,7 +2584,7 @@ def test_field_is_null_numpy(): if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(NLP_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = [] # field array_d is null for row_id in range(16): @@ -2607,7 +2613,7 @@ def test_field_is_null_numpy(): writer.write_raw_data(data) writer.commit() - data_set = ds.MindDataset(dataset_file=NLP_FILE_NAME + "0", + data_set = ds.MindDataset(dataset_file=file_name + "0", columns_list=["label", "array_a", "array_b", "array_d"], num_parallel_workers=2, shuffle=False) @@ -2639,8 +2645,9 @@ def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file): "array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1]) }) num_readers = 1 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data_set = ds.MindDataset( - NLP_FILE_NAME + "0", None, num_readers, shuffle=False) + file_name + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 16 # create_dict_iterator in for loop diff --git a/tests/ut/python/dataset/test_minddataset_padded.py b/tests/ut/python/dataset/test_minddataset_padded.py index 97daba392af..a268adf4eee 100644 --- a/tests/ut/python/dataset/test_minddataset_padded.py +++ b/tests/ut/python/dataset/test_minddataset_padded.py @@ -28,26 +28,22 @@ from mindspore import log as logger from mindspore.mindrecord import FileWriter FILES_NUM = 4 -CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" -CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord" -CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord" CV_DIR_NAME = "../data/mindrecord/testImageNetData" -NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord" NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos" NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt" - @pytest.fixture def add_and_remove_cv_file(): """add/remove cv file""" - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None os.remove("{}.db".format(x)) if os.path.exists( "{}.db".format(x)) else None - writer = FileWriter(CV_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = get_data(CV_DIR_NAME) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, @@ -72,7 +68,8 @@ def add_and_remove_cv_file(): @pytest.fixture def add_and_remove_nlp_file(): """add/remove nlp file""" - paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -80,7 +77,7 @@ def add_and_remove_nlp_file(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(NLP_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"}, "rating": {"type": "float32"}, @@ -118,7 +115,8 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): padded_sample['label'] = -1 padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) assert data_set.get_dataset_size() == 15 num_iter = 0 num_padded_iter = 0 @@ -145,7 +143,8 @@ def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_ padded_sample['label'] = -1 padded_sample['file_name'] = 99999 num_readers = 4 - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5) assert data_set.get_dataset_size() == 15 num_iter = 0 num_padded_iter = 0 @@ -173,12 +172,13 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): padded_sample['label'] = -2 padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded, dataset_size): num_padded_iter = 0 num_iter = 0 for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -213,6 +213,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f padded_sample['label'] = -2 padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded, dataset_size): repeat_size = 5 @@ -224,7 +225,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f epoch3_shuffle_result = [] epoch4_shuffle_result = [] epoch5_shuffle_result = [] - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -285,10 +286,11 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv padded_sample['label'] = -2 padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -310,10 +312,11 @@ def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_a padded_sample['label'] = -2 padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -332,10 +335,11 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re padded_sample.pop('label', None) padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -356,10 +360,11 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c padded_sample['label'] = -2 padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, + data_set = ds.MindDataset(file_name + "0", None, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -380,10 +385,11 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv padded_sample = data[0] padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, + data_set = ds.MindDataset(file_name + "0", None, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample) @@ -403,10 +409,11 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov padded_sample = data[0] padded_sample['file_name'] = 'dummy.jpg' num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded): for partition_id in range(num_shards): - data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, + data_set = ds.MindDataset(file_name + "0", None, num_readers, num_shards=num_shards, shard_id=partition_id, num_padded=num_padded) @@ -429,12 +436,13 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): padded_sample['input_ids'] = np.array([-1, -1, -1, -1], dtype=np.int64) padded_sample['rating'] = 1.0 num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded, dataset_size): num_padded_iter = 0 num_iter = 0 for partition_id in range(num_shards): - data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -470,6 +478,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ padded_sample['rating'] = 1.0 num_readers = 4 repeat_size = 3 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded, dataset_size): num_padded_iter = 0 @@ -479,7 +488,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ epoch1_shuffle_result = [] epoch2_shuffle_result = [] epoch3_shuffle_result = [] - data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, @@ -534,6 +543,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul padded_sample['rating'] = 1.0 num_readers = 4 repeat_size = 3 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] def partitions(num_shards, num_padded, dataset_size): num_padded_iter = 0 @@ -542,7 +552,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul epoch_result = [[["" for i in range(dataset_size)] for i in range(repeat_size)] for i in range(num_shards)] for partition_id in range(num_shards): - data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample, diff --git a/tests/ut/python/dataset/test_minddataset_sampler.py b/tests/ut/python/dataset/test_minddataset_sampler.py index 16abdac939c..a41c7795fc8 100644 --- a/tests/ut/python/dataset/test_minddataset_sampler.py +++ b/tests/ut/python/dataset/test_minddataset_sampler.py @@ -25,14 +25,13 @@ from mindspore.dataset.text import to_str from mindspore.mindrecord import FileWriter FILES_NUM = 4 -CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord" CV_DIR_NAME = "../data/mindrecord/testImageNetData" - @pytest.fixture def add_and_remove_cv_file(): """add/remove cv file""" - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: @@ -40,7 +39,7 @@ def add_and_remove_cv_file(): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) - writer = FileWriter(CV_FILE_NAME, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) data = get_data(CV_DIR_NAME, True) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, @@ -66,7 +65,8 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): """tutorial for cv minderdataset.""" num_readers = 4 sampler = ds.PKSampler(2) - data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", None, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 6 @@ -86,7 +86,8 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(2) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 6 @@ -108,7 +109,8 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(3, None, True) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 9 @@ -129,7 +131,8 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(3, None, True, 'label', 5) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 @@ -150,7 +153,8 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(3, None, True, 'label', 10) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 9 @@ -171,7 +175,8 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 @@ -191,7 +196,8 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True, 'label', 20) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 @@ -211,7 +217,8 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True, 'label', 10) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 10 num_iter = 0 @@ -230,10 +237,11 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] indices = [1, 2, 3, 5, 7] samplers = (ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)) for sampler in samplers: - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 @@ -255,9 +263,10 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 2, 5, 7, 9] + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) for sampler in samplers: - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 6 num_iter = 0 @@ -279,9 +288,10 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [] + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) for sampler in samplers: - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 0 num_iter = 0 @@ -304,8 +314,9 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file num_readers = 4 indices = [1, 2, 4, 11, 13] samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] for sampler in samplers: - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 @@ -327,8 +338,9 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): num_readers = 4 indices = [1, 2, 4, -1, -2] samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices) + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] for sampler in samplers: - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 @@ -350,7 +362,8 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.RandomSampler() - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 10 num_iter = 0 @@ -373,8 +386,9 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.RandomSampler() - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 10 ds1 = data_set.repeat(3) @@ -407,8 +421,9 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.RandomSampler(replacement=True, num_samples=5) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 @@ -428,8 +443,9 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.RandomSampler(replacement=False, num_samples=2) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 2 num_iter = 0 @@ -449,8 +465,9 @@ def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_fil def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.RandomSampler(replacement=False, num_samples=20) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 10 num_iter = 0 @@ -471,8 +488,9 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.SequentialSampler(1, 4) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 4 num_iter = 0 @@ -495,8 +513,9 @@ def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.SequentialSampler(2, 10) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) dataset_size = data_set.get_dataset_size() assert dataset_size == 10 @@ -520,8 +539,9 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] sampler = ds.SequentialSampler(2, 20) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, + data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, sampler=sampler) dataset_size = data_set.get_dataset_size() assert dataset_size == 10 @@ -545,7 +565,8 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 - d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + d = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False) d1, d2 = d.split([8, 2], randomize=False) assert d.get_dataset_size() == 10 @@ -581,7 +602,8 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 - d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + d = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False) d1, d2 = d.split([0.8, 0.2], randomize=False) assert d.get_dataset_size() == 10 @@ -617,7 +639,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 - d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + d = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False) d1, d2 = d.split([0.41, 0.59], randomize=False) assert d.get_dataset_size() == 10 @@ -652,7 +675,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 - d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + d = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False) # should set seed to avoid data overlap ds.config.set_seed(111) @@ -693,7 +717,8 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): data = get_data(CV_DIR_NAME, True) columns_list = ["data", "file_name", "label"] num_readers = 4 - d = ds.MindDataset(CV_FILE_NAME + "0", columns_list, + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + d = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False) # should set seed to avoid data overlap ds.config.set_seed(111) diff --git a/tests/ut/python/dataset/test_save_op.py b/tests/ut/python/dataset/test_save_op.py index 037460952b6..bc87cca64bb 100644 --- a/tests/ut/python/dataset/test_save_op.py +++ b/tests/ut/python/dataset/test_save_op.py @@ -23,38 +23,25 @@ import mindspore.dataset as ds from mindspore import log as logger from mindspore.mindrecord import FileWriter -TEMP_FILE = "../data/mindrecord/testMindDataSet/temp.mindrecord" -AUTO_FILE = "../data/mindrecord/testMindDataSet/auto.mindrecord" TFRECORD_FILES = "../data/mindrecord/testTFRecordData/dummy.tfrecord" FILES_NUM = 1 num_readers = 1 -@pytest.fixture(name="add_remove_file") -def fixture_remove(): +def remove_file(file_name): """add/remove cv file""" - if os.path.exists("{}".format(TEMP_FILE)): - os.remove("{}".format(TEMP_FILE)) - if os.path.exists("{}.db".format(TEMP_FILE)): - os.remove("{}.db".format(TEMP_FILE)) + if os.path.exists("{}".format(file_name)): + os.remove("{}".format(file_name)) + if os.path.exists("{}.db".format(file_name)): + os.remove("{}.db".format(file_name)) - if os.path.exists("{}".format(AUTO_FILE)): - os.remove("{}".format(AUTO_FILE)) - if os.path.exists("{}.db".format(AUTO_FILE)): - os.remove("{}.db".format(AUTO_FILE)) - yield "yield_cv_data" - if os.path.exists("{}".format(TEMP_FILE)): - os.remove("{}".format(TEMP_FILE)) - if os.path.exists("{}.db".format(TEMP_FILE)): - os.remove("{}.db".format(TEMP_FILE)) - - if os.path.exists("{}".format(AUTO_FILE)): - os.remove("{}".format(AUTO_FILE)) - if os.path.exists("{}.db".format(AUTO_FILE)): - os.remove("{}.db".format(AUTO_FILE)) - - -def test_case_00(add_remove_file): # only bin data +def test_case_00(): + """ + Feature: save op + Description: all bin data + Expectation: generated mindrecord file + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data = [{"image1": bytes("image1 bytes abc", encoding='UTF-8'), "image2": bytes("image1 bytes def", encoding='UTF-8'), "image3": bytes("image1 bytes ghi", encoding='UTF-8'), @@ -86,13 +73,16 @@ def test_case_00(add_remove_file): # only bin data "image3": {"type": "bytes"}, "image4": {"type": "bytes"}, "image5": {"type": "bytes"}} - writer = FileWriter(TEMP_FILE, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) writer.add_schema(schema, "schema") writer.write_raw_data(data) writer.commit() - d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False) - d1.save(AUTO_FILE, FILES_NUM) + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' + d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False) + d1.save(file_name_auto, FILES_NUM) data_value_to_list = [] for item in data: @@ -104,7 +94,7 @@ def test_case_00(add_remove_file): # only bin data new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8) data_value_to_list.append(new_data) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) assert d2.get_dataset_size() == 5 @@ -119,9 +109,12 @@ def test_case_00(add_remove_file): # only bin data assert item[field] == data_value_to_list[num_iter][field] num_iter += 1 assert num_iter == 5 + remove_file(file_name) + remove_file(file_name_auto) -def test_case_01(add_remove_file): # only raw data + file_name_auto = './' + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data = [{"file_name": "001.jpg", "label": 43}, {"file_name": "002.jpg", "label": 91}, {"file_name": "003.jpg", "label": 61}, @@ -132,13 +125,16 @@ def test_case_01(add_remove_file): # only raw data "label": {"type": "int32"} } - writer = FileWriter(TEMP_FILE, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) writer.add_schema(schema, "schema") writer.write_raw_data(data) writer.commit() - d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False) - d1.save(AUTO_FILE, FILES_NUM) + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' + d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False) + d1.save(file_name_auto, FILES_NUM) data_value_to_list = [] for item in data: @@ -147,7 +143,7 @@ def test_case_01(add_remove_file): # only raw data new_data['label'] = np.asarray(list([item["label"]]), dtype=np.int32) data_value_to_list.append(new_data) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) assert d2.get_dataset_size() == 6 @@ -163,9 +159,17 @@ def test_case_01(add_remove_file): # only raw data assert item[field] == data_value_to_list[num_iter][field] num_iter += 1 assert num_iter == 6 + remove_file(file_name) + remove_file(file_name_auto) -def test_case_02(add_remove_file): # muti-bytes +def test_case_02(): # muti-bytes + """ + Feature: save op + Description: multiple byte fields + Expectation: generated mindrecord file + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] data = [{"file_name": "001.jpg", "label": 43, "float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32), "float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471, @@ -258,13 +262,16 @@ def test_case_02(add_remove_file): # muti-bytes "label": {"type": "int32"}, "image4": {"type": "bytes"}, "image5": {"type": "bytes"}} - writer = FileWriter(TEMP_FILE, FILES_NUM) + writer = FileWriter(file_name, FILES_NUM) writer.add_schema(schema, "schema") writer.write_raw_data(data) writer.commit() - d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False) - d1.save(AUTO_FILE, FILES_NUM) + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' + d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False) + d1.save(file_name_auto, FILES_NUM) data_value_to_list = [] for item in data: @@ -284,7 +291,7 @@ def test_case_02(add_remove_file): # muti-bytes new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8) data_value_to_list.append(new_data) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) assert d2.get_dataset_size() == 6 @@ -303,6 +310,8 @@ def test_case_02(add_remove_file): # muti-bytes assert item[field] == data_value_to_list[num_iter][field] num_iter += 1 assert num_iter == 6 + remove_file(file_name) + remove_file(file_name_auto) def generator_1d(): @@ -310,14 +319,21 @@ def generator_1d(): yield (np.array([i]),) -def test_case_03(add_remove_file): - +def test_case_03(): + """ + Feature: save op + Description: 1D numpy array + Expectation: generated mindrecord file + """ + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' # apply dataset operations d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False) - d1.save(AUTO_FILE) + d1.save(file_name_auto) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) @@ -327,6 +343,7 @@ def test_case_03(add_remove_file): golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 + remove_file(file_name_auto) def generator_with_type(t): @@ -335,6 +352,9 @@ def generator_with_type(t): def type_tester(t): + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' logger.info("Test with Type {}".format(t.__name__)) # apply dataset operations @@ -344,9 +364,9 @@ def type_tester(t): data1 = data1.repeat(3) - data1.save(AUTO_FILE) + data1.save(file_name_auto) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) @@ -362,10 +382,7 @@ def type_tester(t): i = 0 num_repeat += 1 assert num_repeat == 3 - if os.path.exists("{}".format(AUTO_FILE)): - os.remove("{}".format(AUTO_FILE)) - if os.path.exists("{}.db".format(AUTO_FILE)): - os.remove("{}.db".format(AUTO_FILE)) + remove_file(file_name_auto) def test_case_04(): @@ -377,20 +394,31 @@ def test_case_04(): type_tester(t) -def test_case_05(add_remove_file): +def test_case_05(): + """ + Feature: save op + Description: Exception Test + Expectation: exception + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False) with pytest.raises(Exception, match="num_files should between 0 and 1000."): - d1.save(AUTO_FILE, 0) + d1.save(file_name, 0) -def test_case_06(add_remove_file): - +def test_case_06(): + """ + Feature: save op + Description: Exception Test + Expectation: exception + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False) with pytest.raises(Exception, match="tfrecord dataset format is not supported."): - d1.save(AUTO_FILE, 1, "tfrecord") + d1.save(file_name, 1, "tfrecord") def cast_name(key): @@ -405,16 +433,20 @@ def cast_name(key): def test_case_07(): - if os.path.exists("{}".format(AUTO_FILE)): - os.remove("{}".format(AUTO_FILE)) - if os.path.exists("{}.db".format(AUTO_FILE)): - os.remove("{}.db".format(AUTO_FILE)) + """ + Feature: save op + Description: save tfrecord files + Expectation: generated mindrecord file + """ + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' d1 = ds.TFRecordDataset(TFRECORD_FILES, shuffle=False) tf_data = [] for x in d1.create_dict_iterator(num_epochs=1, output_numpy=True): tf_data.append(x) - d1.save(AUTO_FILE, FILES_NUM) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d1.save(file_name_auto, FILES_NUM) + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) mr_data = [] @@ -429,11 +461,7 @@ def test_case_07(): assert v == mr_data[count][cast_name(k)] count += 1 assert count == 10 - - if os.path.exists("{}".format(AUTO_FILE)): - os.remove("{}".format(AUTO_FILE)) - if os.path.exists("{}.db".format(AUTO_FILE)): - os.remove("{}.db".format(AUTO_FILE)) + remove_file(file_name_auto) def generator_dynamic_1d(): @@ -461,14 +489,21 @@ def generator_dynamic_2d_1(): yield (np.arange(10).reshape([5, 2]),) -def test_case_08(add_remove_file): - +def test_case_08(): + """ + Feature: save op + Description: save dynamic 1D numpy array + Expectation: generated mindrecord file + """ + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' # apply dataset operations d1 = ds.GeneratorDataset(generator_dynamic_1d, ["data"], shuffle=False) - d1.save(AUTO_FILE) + d1.save(file_name_auto) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) @@ -481,16 +516,23 @@ def test_case_08(add_remove_file): golden = np.array(arr) np.testing.assert_array_equal(item["data"], golden) i = i + 1 + remove_file(file_name_auto) - -def test_case_09(add_remove_file): - +def test_case_09(): + """ + Feature: save op + Description: save dynamic 2D numpy array + Expectation: generated mindrecord file + """ + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' # apply dataset operations d1 = ds.GeneratorDataset(generator_dynamic_2d_0, ["data"], shuffle=False) - d1.save(AUTO_FILE) + d1.save(file_name_auto) - d2 = ds.MindDataset(dataset_file=AUTO_FILE, + d2 = ds.MindDataset(dataset_file=file_name_auto, num_parallel_workers=num_readers, shuffle=False) @@ -502,13 +544,23 @@ def test_case_09(add_remove_file): golden = np.arange(10).reshape([2, 5]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 + remove_file(file_name_auto) -def test_case_10(add_remove_file): +def test_case_10(): + """ + Feature: save op + Description: save 2D Tensor of different shape + Expectation: Exception + """ + file_name_auto = './' + file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + file_name_auto += '_auto' # apply dataset operations d1 = ds.GeneratorDataset(generator_dynamic_2d_1, ["data"], shuffle=False) with pytest.raises(Exception, match= "Error: besides dimension 0, other dimension shape is different from the previous's"): - d1.save(AUTO_FILE) + d1.save(file_name_auto) + remove_file(file_name_auto) diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py index f3320fdb6fb..d8e3db5c1ae 100644 --- a/tests/ut/python/dataset/test_serdes_dataset.py +++ b/tests/ut/python/dataset/test_serdes_dataset.py @@ -22,7 +22,6 @@ import os import pytest import numpy as np -from test_minddataset_sampler import add_and_remove_cv_file, get_data, CV_DIR_NAME, CV_FILE_NAME from util import config_get_set_num_parallel_workers, config_get_set_seed import mindspore.common.dtype as mstype @@ -509,38 +508,6 @@ def delete_json_files(): except IOError: logger.info("Error while deleting: {}".format(f)) - -# Test save load minddataset -def skip_test_minddataset(add_and_remove_cv_file=True): - """tutorial for cv minderdataset.""" - columns_list = ["data", "file_name", "label"] - num_readers = 4 - indices = [1, 2, 3, 5, 7] - sampler = ds.SubsetRandomSampler(indices) - data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, - sampler=sampler) - - # Serializing into python dictionary - ds1_dict = ds.serialize(data_set) - # Serializing into json object - ds1_json = json.dumps(ds1_dict, sort_keys=True) - - # Reconstruct dataset pipeline from its serialized form - data_set = ds.deserialize(input_dict=ds1_dict) - ds2_dict = ds.serialize(data_set) - # Serializing into json object - ds2_json = json.dumps(ds2_dict, sort_keys=True) - - assert ds1_json == ds2_json - - _ = get_data(CV_DIR_NAME) - assert data_set.get_dataset_size() == 5 - num_iter = 0 - for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): - num_iter += 1 - assert num_iter == 5 - - if __name__ == '__main__': test_serdes_imagefolder_dataset() test_serdes_mnist_dataset() @@ -555,4 +522,3 @@ if __name__ == '__main__': test_serdes_uniform_augment() skip_test_serdes_fill() test_serdes_exception() - skip_test_minddataset() diff --git a/tests/ut/python/mindrecord/test_mindrecord_exception.py b/tests/ut/python/mindrecord/test_mindrecord_exception.py index 628f674e77b..fc8aedbd5ee 100644 --- a/tests/ut/python/mindrecord/test_mindrecord_exception.py +++ b/tests/ut/python/mindrecord/test_mindrecord_exception.py @@ -23,8 +23,6 @@ from mindspore import log as logger from mindspore.mindrecord import FileWriter, FileReader, MindPage, SUCCESS from mindspore.mindrecord import ParamValueError, MRMGetMetaError -CV_FILE_NAME = "./imagenet.mindrecord" -NLP_FILE_NAME = "./aclImdb.mindrecord" FILES_NUM = 4 def remove_one_file(x): @@ -42,20 +40,6 @@ def remove_file(file_name): x = file_name + str(i) + ".db" remove_one_file(x) -@pytest.fixture -def fixture_cv_file(): - """add/remove file""" - remove_file(CV_FILE_NAME) - yield "yield_fixture_data" - remove_file(CV_FILE_NAME) - -@pytest.fixture -def fixture_nlp_file(): - """add/remove file""" - remove_file(NLP_FILE_NAME) - yield "yield_fixture_data" - remove_file(NLP_FILE_NAME) - def test_cv_file_writer_shard_num_none(): """test cv file writer when shard num is None.""" with pytest.raises(Exception, match="Shard num is illegal."): @@ -71,29 +55,29 @@ def test_cv_file_writer_shard_num_str(): def test_cv_page_reader_consumer_num_none(): """test cv page reader when consumer number is None.""" with pytest.raises(Exception, match="Consumer number is illegal."): - MindPage(CV_FILE_NAME + "0", None) + MindPage("dummy.mindrecord", None) def test_cv_page_reader_consumer_num_str(): """test cv page reader when consumer number is string.""" with pytest.raises(Exception, match="Consumer number is illegal."): - MindPage(CV_FILE_NAME + "0", "2") + MindPage("dummy.mindrecord", "2") def test_nlp_file_reader_consumer_num_none(): """test nlp file reader when consumer number is None.""" with pytest.raises(Exception, match="Consumer number is illegal."): - FileReader(NLP_FILE_NAME + "0", None) + FileReader("dummy.mindrecord", None) def test_nlp_file_reader_consumer_num_str(): """test nlp file reader when consumer number is string.""" with pytest.raises(Exception, match="Consumer number is illegal."): - FileReader(NLP_FILE_NAME + "0", "4") + FileReader("dummy.mindrecord", "4") -def create_cv_mindrecord(files_num): - writer = FileWriter(CV_FILE_NAME, files_num) +def create_cv_mindrecord(files_num, file_name): + writer = FileWriter(file_name, files_num) data = get_data("../data/mindrecord/testImageNetData/") cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int64"}, "data": {"type": "bytes"}} @@ -104,139 +88,218 @@ def create_cv_mindrecord(files_num): def test_lack_partition_and_db(): - """test file reader when mindrecord file does not exist.""" + """ + Feature: FileReader + Description: test file reader when mindrecord file does not exist + Expectation: exception occur + """ with pytest.raises(RuntimeError) as err: reader = FileReader('dummy.mindrecord') reader.close() assert 'Unexpected error. Invalid file, path:' in str(err.value) -def test_lack_db(fixture_cv_file): - """test file reader when db file does not exist.""" - create_cv_mindrecord(1) - os.remove("{}.db".format(CV_FILE_NAME)) +def test_lack_db(): + """ + Feature: FileReader + Description: test file reader when db file does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(1, file_name) + os.remove("{}.db".format(file_name)) with pytest.raises(RuntimeError) as err: - reader = FileReader(CV_FILE_NAME) + reader = FileReader(file_name) reader.close() assert 'Unexpected error. Invalid database file, path:' in str(err.value) + remove_file(file_name) -def test_lack_some_partition_and_db(fixture_cv_file): - """test file reader when some partition and db do not exist.""" - create_cv_mindrecord(4) - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) +def test_lack_some_partition_and_db(): + """ + Feature: FileReader + Description: test file reader when some partition and db do not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] os.remove("{}".format(paths[3])) os.remove("{}.db".format(paths[3])) with pytest.raises(RuntimeError) as err: - reader = FileReader(CV_FILE_NAME + "0") + reader = FileReader(file_name + "0") reader.close() assert 'Unexpected error. Invalid file, path:' in str(err.value) + remove_file(file_name) -def test_lack_some_partition_first(fixture_cv_file): - """test file reader when first partition does not exist.""" - create_cv_mindrecord(4) - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) +def test_lack_some_partition_first(): + """ + Feature: FileReader + Description: test file reader when first partition does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] os.remove("{}".format(paths[0])) with pytest.raises(RuntimeError) as err: - reader = FileReader(CV_FILE_NAME + "0") + reader = FileReader(file_name + "0") reader.close() assert 'Unexpected error. Invalid file, path:' in str(err.value) + remove_file(file_name) -def test_lack_some_partition_middle(fixture_cv_file): - """test file reader when some partition does not exist.""" - create_cv_mindrecord(4) - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) +def test_lack_some_partition_middle(): + """ + Feature: FileReader + Description: test file reader when some partition does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] os.remove("{}".format(paths[1])) with pytest.raises(RuntimeError) as err: - reader = FileReader(CV_FILE_NAME + "0") + reader = FileReader(file_name + "0") reader.close() assert 'Unexpected error. Invalid file, path:' in str(err.value) + remove_file(file_name) -def test_lack_some_partition_last(fixture_cv_file): - """test file reader when last partition does not exist.""" - create_cv_mindrecord(4) - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) +def test_lack_some_partition_last(): + """ + Feature: FileReader + Description: test file reader when last partition does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] os.remove("{}".format(paths[3])) with pytest.raises(RuntimeError) as err: - reader = FileReader(CV_FILE_NAME + "0") + reader = FileReader(file_name + "0") reader.close() assert 'Unexpected error. Invalid file, path:' in str(err.value) + remove_file(file_name) -def test_mindpage_lack_some_partition(fixture_cv_file): - """test page reader when some partition does not exist.""" - create_cv_mindrecord(4) - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) +def test_mindpage_lack_some_partition(): + """ + Feature: MindPage + Description: test page reader when some partition does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] os.remove("{}".format(paths[0])) with pytest.raises(RuntimeError) as err: - MindPage(CV_FILE_NAME + "0") + MindPage(file_name + "0") assert 'Unexpected error. Invalid file, path:' in str(err.value) + remove_file(file_name) -def test_lack_some_db(fixture_cv_file): - """test file reader when some db does not exist.""" - create_cv_mindrecord(4) - paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) +def test_lack_some_db(): + """ + Feature: FileReader + Description: test file reader when some db does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + paths = ["{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM)] os.remove("{}.db".format(paths[3])) with pytest.raises(RuntimeError) as err: - reader = FileReader(CV_FILE_NAME + "0") + reader = FileReader(file_name + "0") reader.close() assert 'Unexpected error. Invalid database file, path:' in str(err.value) - + remove_file(file_name) def test_invalid_mindrecord(): - """test file reader when the content of mindrecord is illegal.""" - with open(CV_FILE_NAME, 'w') as f: + """ + Feature: FileReader + Description: test file reader when the content of mindrecord is illegal + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + with open(file_name, 'w') as f: dummy = 's' * 100 f.write(dummy) with pytest.raises(RuntimeError) as err: - FileReader(CV_FILE_NAME) + FileReader(file_name) assert "Unexpected error. Invalid file content, incorrect file or file header" in str(err.value) - os.remove(CV_FILE_NAME) + remove_file(file_name) -def test_invalid_db(fixture_cv_file): - """test file reader when the content of db is illegal.""" - create_cv_mindrecord(1) - os.remove("imagenet.mindrecord.db") - with open('imagenet.mindrecord.db', 'w') as f: +def test_invalid_db(): + """ + Feature: FileReader + Description: test file reader when the content of db is illegal + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(1, file_name) + os.remove(file_name + ".db") + with open(file_name + ".db", 'w') as f: f.write('just for test') with pytest.raises(RuntimeError) as err: - FileReader('imagenet.mindrecord') + FileReader(file_name) assert "Unexpected error. Failed to execute sql [ SELECT NAME from SHARD_NAME; ], " in str(err.value) + remove_file(file_name) -def test_overwrite_invalid_mindrecord(fixture_cv_file): - """test file writer when overwrite invalid mindreocrd file.""" - with open(CV_FILE_NAME, 'w') as f: +def test_overwrite_invalid_mindrecord(): + """ + Feature: FileWriter + Description: test file writer when overwrite invalid mindreocrd file + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + with open(file_name, 'w') as f: f.write('just for test') with pytest.raises(RuntimeError) as err: - create_cv_mindrecord(1) + create_cv_mindrecord(1, file_name) assert 'Unexpected error. Invalid file, Mindrecord files already existed in path:' in str(err.value) + remove_file(file_name) -def test_overwrite_invalid_db(fixture_cv_file): - """test file writer when overwrite invalid db file.""" - with open('imagenet.mindrecord.db', 'w') as f: +def test_overwrite_invalid_db(): + """ + Feature: FileWriter + Description: test file writer when overwrite invalid db file + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + with open(file_name + '.db', 'w') as f: f.write('just for test') with pytest.raises(RuntimeError) as err: - create_cv_mindrecord(1) + create_cv_mindrecord(1, file_name) assert 'Unexpected error. Failed to write data to db.' in str(err.value) + remove_file(file_name) -def test_read_after_close(fixture_cv_file): - """test file reader when close read.""" - create_cv_mindrecord(1) - reader = FileReader(CV_FILE_NAME) +def test_read_after_close(): + """ + Feature: FileReader + Description: test file reader when close read + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(1, file_name) + reader = FileReader(file_name) reader.close() count = 0 for index, x in enumerate(reader.get_next()): count = count + 1 logger.info("#item{}: {}".format(index, x)) assert count == 0 + remove_file(file_name) -def test_file_read_after_read(fixture_cv_file): - """test file reader when finish read.""" - create_cv_mindrecord(1) - reader = FileReader(CV_FILE_NAME) +def test_file_read_after_read(): + """ + Feature: FileReader + Description: test file reader when finish read + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(1, file_name) + reader = FileReader(file_name) count = 0 for index, x in enumerate(reader.get_next()): assert len(x) == 3 @@ -249,25 +312,40 @@ def test_file_read_after_read(fixture_cv_file): cnt = cnt + 1 logger.info("#item{}: {}".format(index, x)) assert cnt == 0 + remove_file(file_name) def test_cv_file_writer_shard_num_greater_than_1000(): - """test cv file writer shard number greater than 1000.""" + """ + Feature: FileWriter + Description: test cv file writer shard number greater than 1000 + Expectation: exception occur + """ with pytest.raises(ParamValueError) as err: - FileWriter(CV_FILE_NAME, 1001) + FileWriter('dummy.mindrecord', 1001) assert 'Shard number should between' in str(err.value) def test_add_index_without_add_schema(): + """ + Feature: FileWriter + Description: test add index without adding schema + Expectation: exception occur + """ with pytest.raises(MRMGetMetaError) as err: - fw = FileWriter(CV_FILE_NAME) + fw = FileWriter('dummy.mindrecord') fw.add_index(["label"]) assert 'Failed to get meta info' in str(err.value) -def test_mindpage_pageno_pagesize_not_int(fixture_cv_file): - """test page reader when some partition does not exist.""" - create_cv_mindrecord(4) - reader = MindPage(CV_FILE_NAME + "0") +def test_mindpage_pageno_pagesize_not_int(): + """ + Feature: MindPage + Description: test page reader when some partition does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + reader = MindPage(file_name + "0") fields = reader.get_category_fields() assert fields == ['file_name', 'label'], \ 'failed on getting candidate category fields.' @@ -293,12 +371,18 @@ def test_mindpage_pageno_pagesize_not_int(fixture_cv_file): with pytest.raises(RuntimeError, match=r"Unexpected error. Invalid data, " r"category_id: 99999 must be in the range \[0, 10\]."): reader.read_at_page_by_id(99999, 0, 1) + remove_file(file_name) -def test_mindpage_filename_not_exist(fixture_cv_file): - """test page reader when some partition does not exist.""" - create_cv_mindrecord(4) - reader = MindPage(CV_FILE_NAME + "0") +def test_mindpage_filename_not_exist(): + """ + Feature: FileWrite + Description: test page reader when some partition does not exist + Expectation: exception occur + """ + file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] + create_cv_mindrecord(4, file_name) + reader = MindPage(file_name + "0") fields = reader.get_category_fields() assert fields == ['file_name', 'label'], \ 'failed on getting candidate category fields.' @@ -319,11 +403,15 @@ def test_mindpage_filename_not_exist(fixture_cv_file): with pytest.raises(ParamValueError): reader.read_at_page_by_name(1, 0, 1) - _ = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) - for x in range(FILES_NUM)] + remove_file(file_name) def test_invalid_schema(): - mindrecord_file_name = "test.mindrecord" + """ + Feature: FileWrite + Description: test invalid schema + Expectation: exception occur + """ + mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] writer = FileWriter(mindrecord_file_name) # string => str @@ -462,7 +550,7 @@ def test_invalid_schema(): os.remove("{}.db".format(mindrecord_file_name)) def test_write_with_invalid_data(): - mindrecord_file_name = "test.mindrecord" + mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0] # field: file_name => filename with pytest.raises(RuntimeError, match="Unexpected error. Invalid data, schema count should be positive."):