add try to minddataset python tests to remove files on fail

delete unrelated file change finally to except else fix spacing alignment fix indentation fix indentation fix indentation add fix to new test case add if main for pytest fix spacing
2020-07-17 14:06:14 -04:00 · 2020-07-17 14:06:14 -04:00 · 1f4251a440
parent 859fe6bc41
commit 1f4251a440
5 changed files with 1306 additions and 1083 deletions
--- a/tests/ut/python/dataset/test_minddataset.py
+++ b/tests/ut/python/dataset/test_minddataset.py
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@ -99,8 +99,13 @@ def test_invalid_mindrecord():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-        assert num_iter == 0
-    os.remove('dummy.mindrecord')
+        try:
+            assert num_iter == 0
+        except Exception as error:
+            os.remove('dummy.mindrecord')
+            raise error
+        else:
+            os.remove('dummy.mindrecord')


 def test_minddataset_lack_db():
@ -113,8 +118,13 @@ def test_minddataset_lack_db():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-        assert num_iter == 0
-    os.remove(CV_FILE_NAME)
+        try:
+            assert num_iter == 0
+        except Exception as error:
+            os.remove(CV_FILE_NAME)
+            raise error
+        else:
+            os.remove(CV_FILE_NAME)


 def test_cv_minddataset_pk_sample_error_class_column():
@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))

-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))

 def test_minddataset_invalidate_shard_id():
    create_cv_mindrecord(1)
@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))


 def test_minddataset_shard_id_bigger_than_num_shard():
@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error

    with pytest.raises(Exception) as error_info:
        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))

-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))

 def test_cv_minddataset_partition_num_samples_equals_0():
    """tutorial for cv minddataset."""
@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
                num_iter += 1
    with pytest.raises(Exception) as error_info:
        partitions(5)
-    assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
+    try:
+        assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))

-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))
+if __name__ == '__main__':
+    test_cv_lack_json()
+    test_cv_lack_mindrecord()
+    test_invalid_mindrecord()
+    test_minddataset_lack_db()
+    test_cv_minddataset_pk_sample_error_class_column()
+    test_cv_minddataset_pk_sample_exclusive_shuffle()
+    test_cv_minddataset_reader_different_schema()
+    test_cv_minddataset_reader_different_page_size()
+    test_minddataset_invalidate_num_shards()
+    test_minddataset_invalidate_shard_id()
+    test_minddataset_shard_id_bigger_than_num_shard()
+    test_cv_minddataset_partition_num_samples_equals_0()
--- a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
+++ b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"


 def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
-    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
-    cv_schema_json = {"id": {"type": "int32"},
-                      "image_0": {"type": "bytes"},
-                      "image_2": {"type": "bytes"},
-                      "image_3": {"type": "bytes"},
-                      "image_4": {"type": "bytes"},
-                      "input_mask": {"type": "int32", "shape": [-1]},
-                      "segments": {"type": "float32", "shape": [2, 3]}}
-    writer.add_schema(cv_schema_json, "two_images_schema")
-    with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
-        img_data = file_reader.read()
-    ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
-    ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
-    data = []
-    for i in range(5):
-        item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
-                "input_mask": ndarray_1, "segments": ndarray_2}
-        data.append(item)
-    writer.write_raw_data(data)
-    writer.commit()
-    assert os.path.exists(CV_FILE_NAME)
-    assert os.path.exists(CV_FILE_NAME + ".db")
+    try:
+        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
+        cv_schema_json = {"id": {"type": "int32"},
+                          "image_0": {"type": "bytes"},
+                          "image_2": {"type": "bytes"},
+                          "image_3": {"type": "bytes"},
+                          "image_4": {"type": "bytes"},
+                          "input_mask": {"type": "int32", "shape": [-1]},
+                          "segments": {"type": "float32", "shape": [2, 3]}}
+        writer.add_schema(cv_schema_json, "two_images_schema")
+        with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
+            img_data = file_reader.read()
+        ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
+        ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
+        data = []
+        for i in range(5):
+            item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
+                    "input_mask": ndarray_1, "segments": ndarray_2}
+            data.append(item)
+        writer.write_raw_data(data)
+        writer.commit()
+        assert os.path.exists(CV_FILE_NAME)
+        assert os.path.exists(CV_FILE_NAME + ".db")

-    # tutorial for minderdataset.
-    columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
-    num_readers = 1
-    data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
-    assert data_set.get_dataset_size() == 5
-    num_iter = 0
-    for item in data_set.create_dict_iterator():
-        assert len(item) == 7
-        logger.info("item: {}".format(item))
-        assert item["image_0"].dtype == np.uint8
-        assert (item["image_0"] == item["image_2"]).all()
-        assert (item["image_3"] == item["image_4"]).all()
-        assert (item["image_0"] == item["image_4"]).all()
-        assert item["image_2"].dtype == np.uint8
-        assert item["image_3"].dtype == np.uint8
-        assert item["image_4"].dtype == np.uint8
-        assert item["id"].dtype == np.int32
-        assert item["input_mask"].shape == (5,)
-        assert item["input_mask"].dtype == np.int32
-        assert item["segments"].shape == (2, 3)
-        assert item["segments"].dtype == np.float32
-        num_iter += 1
-    assert num_iter == 5
+        # tutorial for minderdataset.
+        columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
+        num_readers = 1
+        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
+        assert data_set.get_dataset_size() == 5
+        num_iter = 0
+        for item in data_set.create_dict_iterator():
+            assert len(item) == 7
+            logger.info("item: {}".format(item))
+            assert item["image_0"].dtype == np.uint8
+            assert (item["image_0"] == item["image_2"]).all()
+            assert (item["image_3"] == item["image_4"]).all()
+            assert (item["image_0"] == item["image_4"]).all()
+            assert item["image_2"].dtype == np.uint8
+            assert item["image_3"].dtype == np.uint8
+            assert item["image_4"].dtype == np.uint8
+            assert item["id"].dtype == np.int32
+            assert item["input_mask"].shape == (5,)
+            assert item["input_mask"].dtype == np.int32
+            assert item["segments"].shape == (2, 3)
+            assert item["segments"].dtype == np.float32
+            num_iter += 1
+        assert num_iter == 5
+    except Exception as error:
+        if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
+            os.remove(CV_FILE_NAME + ".db")
+        if os.path.exists("{}".format(CV_FILE_NAME)):
+            os.remove(CV_FILE_NAME)
+        raise error
+    else:
+        if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
+            os.remove(CV_FILE_NAME + ".db")
+        if os.path.exists("{}".format(CV_FILE_NAME)):
+            os.remove(CV_FILE_NAME)

-    if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
-        os.remove(CV_FILE_NAME + ".db")
-    if os.path.exists("{}".format(CV_FILE_NAME)):
-        os.remove(CV_FILE_NAME)
+if __name__ == '__main__':
+    test_cv_minddataset_reader_multi_image_and_ndarray_tutorial()
--- a/tests/ut/python/dataset/test_minddataset_padded.py
+++ b/tests/ut/python/dataset/test_minddataset_padded.py
@ -44,24 +44,31 @@ def add_and_remove_cv_file():
    """add/remove cv file"""
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
-    for x in paths:
-        os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
-        os.remove("{}.db".format(x)) if os.path.exists(
-            "{}.db".format(x)) else None
-    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
-    data = get_data(CV_DIR_NAME)
-    cv_schema_json = {"id": {"type": "int32"},
-                      "file_name": {"type": "string"},
-                      "label": {"type": "int32"},
-                      "data": {"type": "bytes"}}
-    writer.add_schema(cv_schema_json, "img_schema")
-    writer.add_index(["file_name", "label"])
-    writer.write_raw_data(data)
-    writer.commit()
-    yield "yield_cv_data"
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))
+    try:
+        for x in paths:
+            os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
+            os.remove("{}.db".format(x)) if os.path.exists(
+                "{}.db".format(x)) else None
+        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
+        data = get_data(CV_DIR_NAME)
+        cv_schema_json = {"id": {"type": "int32"},
+                        "file_name": {"type": "string"},
+                        "label": {"type": "int32"},
+                        "data": {"type": "bytes"}}
+        writer.add_schema(cv_schema_json, "img_schema")
+        writer.add_index(["file_name", "label"])
+        writer.write_raw_data(data)
+        writer.commit()
+        yield "yield_cv_data"
+    except Exception as error:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))
+        raise error
+    else:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))


@pytest.fixture
@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
    """add/remove nlp file"""
    paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
-    for x in paths:
-        if os.path.exists("{}".format(x)):
+    try:
+        for x in paths:
+            if os.path.exists("{}".format(x)):
+                os.remove("{}".format(x))
+            if os.path.exists("{}.db".format(x)):
+                os.remove("{}.db".format(x))
+        writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
+        data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
+        nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
+                        "rating": {"type": "float32"},
+                        "input_ids": {"type": "int64",
+                                        "shape": [-1]},
+                        "input_mask": {"type": "int64",
+                                        "shape": [1, -1]},
+                        "segment_ids": {"type": "int64",
+                                        "shape": [2, -1]}
+                        }
+        writer.set_header_size(1 << 14)
+        writer.set_page_size(1 << 15)
+        writer.add_schema(nlp_schema_json, "nlp_schema")
+        writer.add_index(["id", "rating"])
+        writer.write_raw_data(data)
+        writer.commit()
+        yield "yield_nlp_data"
+    except Exception as error:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))
+        raise error
+    else:
+        for x in paths:
            os.remove("{}".format(x))
-        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
-    writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
-    data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
-    nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
-                       "rating": {"type": "float32"},
-                       "input_ids": {"type": "int64",
-                                     "shape": [-1]},
-                       "input_mask": {"type": "int64",
-                                      "shape": [1, -1]},
-                       "segment_ids": {"type": "int64",
-                                       "shape": [2, -1]}
-                       }
-    writer.set_header_size(1 << 14)
-    writer.set_page_size(1 << 15)
-    writer.add_schema(nlp_schema_json, "nlp_schema")
-    writer.add_index(["id", "rating"])
-    writer.write_raw_data(data)
-    writer.commit()
-    yield "yield_nlp_data"
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))

 def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
                    encoding='utf8')
            assert item['label'] == padded_sample['label']
            assert (item['data'] == np.array(list(padded_sample['data']))).all()
-        num_iter += 1 
+        num_iter += 1
    assert num_padded_iter == 5
    assert num_iter == 15

@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
    mask = [1] * length + [0] * (maxlen - length)
    segment = [0] * maxlen
    return input_, mask, segment
+
+if __name__ == '__main__':
+    test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file)
+    test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file)
+    test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file)
+    test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file)
--- a/tests/ut/python/dataset/test_minddataset_sampler.py
+++ b/tests/ut/python/dataset/test_minddataset_sampler.py
@ -34,26 +34,32 @@ def add_and_remove_cv_file():
    """add/remove cv file"""
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
-    for x in paths:
-        if os.path.exists("{}".format(x)):
+    try:
+        for x in paths:
+            if os.path.exists("{}".format(x)):
+                os.remove("{}".format(x))
+            if os.path.exists("{}.db".format(x)):
+                os.remove("{}.db".format(x))
+        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
+        data = get_data(CV_DIR_NAME, True)
+        cv_schema_json = {"id": {"type": "int32"},
+                          "file_name": {"type": "string"},
+                          "label": {"type": "int32"},
+                          "data": {"type": "bytes"}}
+        writer.add_schema(cv_schema_json, "img_schema")
+        writer.add_index(["file_name", "label"])
+        writer.write_raw_data(data)
+        writer.commit()
+        yield "yield_cv_data"
+    except Exception as error:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))
+        raise error
+    else:
+        for x in paths:
            os.remove("{}".format(x))
-        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
-    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
-    data = get_data(CV_DIR_NAME, True)
-    cv_schema_json = {"id": {"type": "int32"},
-                      "file_name": {"type": "string"},
-                      "label": {"type": "int32"},
-                      "data": {"type": "bytes"}}
-    writer.add_schema(cv_schema_json, "img_schema")
-    writer.add_index(["file_name", "label"])
-    writer.write_raw_data(data)
-    writer.commit()
-    yield "yield_cv_data"
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))
-

 def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
        except FileNotFoundError:
            continue
    return data_list
+
+if __name__ == '__main__':
+    test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file)
+    test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file)
+    test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file)
+    test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file)
+    test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file)
+    test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file)
+    test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file)
+    test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file)
+    test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file)
+    test_cv_minddataset_split_basic(add_and_remove_cv_file)
+    test_cv_minddataset_split_exact_percent(add_and_remove_cv_file)
+    test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file)
+    test_cv_minddataset_split_deterministic(add_and_remove_cv_file)
+    test_cv_minddataset_split_sharding(add_and_remove_cv_file)