add try to minddataset python tests to remove files on fail

delete unrelated file

change finally to except else

fix spacing alignment

fix indentation

fix indentation

fix indentation

add fix to new test case

add if main for pytest

fix spacing
This commit is contained in:
tony_liu2 2020-07-17 14:06:14 -04:00
parent 859fe6bc41
commit 1f4251a440
5 changed files with 1306 additions and 1083 deletions

File diff suppressed because it is too large Load Diff

View File

@ -99,8 +99,13 @@ def test_invalid_mindrecord():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert num_iter == 0
os.remove('dummy.mindrecord')
try:
assert num_iter == 0
except Exception as error:
os.remove('dummy.mindrecord')
raise error
else:
os.remove('dummy.mindrecord')
def test_minddataset_lack_db():
@ -113,8 +118,13 @@ def test_minddataset_lack_db():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert num_iter == 0
os.remove(CV_FILE_NAME)
try:
assert num_iter == 0
except Exception as error:
os.remove(CV_FILE_NAME)
raise error
else:
os.remove(CV_FILE_NAME)
def test_cv_minddataset_pk_sample_error_class_column():
@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
try:
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_minddataset_invalidate_shard_id():
create_cv_mindrecord(1)
@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
try:
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_minddataset_shard_id_bigger_than_num_shard():
@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
try:
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
with pytest.raises(Exception) as error_info:
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
try:
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_cv_minddataset_partition_num_samples_equals_0():
"""tutorial for cv minddataset."""
@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
num_iter += 1
with pytest.raises(Exception) as error_info:
partitions(5)
assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
try:
assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
if __name__ == '__main__':
test_cv_lack_json()
test_cv_lack_mindrecord()
test_invalid_mindrecord()
test_minddataset_lack_db()
test_cv_minddataset_pk_sample_error_class_column()
test_cv_minddataset_pk_sample_exclusive_shuffle()
test_cv_minddataset_reader_different_schema()
test_cv_minddataset_reader_different_page_size()
test_minddataset_invalidate_num_shards()
test_minddataset_invalidate_shard_id()
test_minddataset_shard_id_bigger_than_num_shard()
test_cv_minddataset_partition_num_samples_equals_0()

View File

@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"
def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
cv_schema_json = {"id": {"type": "int32"},
"image_0": {"type": "bytes"},
"image_2": {"type": "bytes"},
"image_3": {"type": "bytes"},
"image_4": {"type": "bytes"},
"input_mask": {"type": "int32", "shape": [-1]},
"segments": {"type": "float32", "shape": [2, 3]}}
writer.add_schema(cv_schema_json, "two_images_schema")
with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
img_data = file_reader.read()
ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
data = []
for i in range(5):
item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
"input_mask": ndarray_1, "segments": ndarray_2}
data.append(item)
writer.write_raw_data(data)
writer.commit()
assert os.path.exists(CV_FILE_NAME)
assert os.path.exists(CV_FILE_NAME + ".db")
try:
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
cv_schema_json = {"id": {"type": "int32"},
"image_0": {"type": "bytes"},
"image_2": {"type": "bytes"},
"image_3": {"type": "bytes"},
"image_4": {"type": "bytes"},
"input_mask": {"type": "int32", "shape": [-1]},
"segments": {"type": "float32", "shape": [2, 3]}}
writer.add_schema(cv_schema_json, "two_images_schema")
with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
img_data = file_reader.read()
ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
data = []
for i in range(5):
item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
"input_mask": ndarray_1, "segments": ndarray_2}
data.append(item)
writer.write_raw_data(data)
writer.commit()
assert os.path.exists(CV_FILE_NAME)
assert os.path.exists(CV_FILE_NAME + ".db")
# tutorial for minderdataset.
columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
num_readers = 1
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5
num_iter = 0
for item in data_set.create_dict_iterator():
assert len(item) == 7
logger.info("item: {}".format(item))
assert item["image_0"].dtype == np.uint8
assert (item["image_0"] == item["image_2"]).all()
assert (item["image_3"] == item["image_4"]).all()
assert (item["image_0"] == item["image_4"]).all()
assert item["image_2"].dtype == np.uint8
assert item["image_3"].dtype == np.uint8
assert item["image_4"].dtype == np.uint8
assert item["id"].dtype == np.int32
assert item["input_mask"].shape == (5,)
assert item["input_mask"].dtype == np.int32
assert item["segments"].shape == (2, 3)
assert item["segments"].dtype == np.float32
num_iter += 1
assert num_iter == 5
# tutorial for minderdataset.
columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
num_readers = 1
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5
num_iter = 0
for item in data_set.create_dict_iterator():
assert len(item) == 7
logger.info("item: {}".format(item))
assert item["image_0"].dtype == np.uint8
assert (item["image_0"] == item["image_2"]).all()
assert (item["image_3"] == item["image_4"]).all()
assert (item["image_0"] == item["image_4"]).all()
assert item["image_2"].dtype == np.uint8
assert item["image_3"].dtype == np.uint8
assert item["image_4"].dtype == np.uint8
assert item["id"].dtype == np.int32
assert item["input_mask"].shape == (5,)
assert item["input_mask"].dtype == np.int32
assert item["segments"].shape == (2, 3)
assert item["segments"].dtype == np.float32
num_iter += 1
assert num_iter == 5
except Exception as error:
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
raise error
else:
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
if __name__ == '__main__':
test_cv_minddataset_reader_multi_image_and_ndarray_tutorial()

View File

@ -44,24 +44,31 @@ def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
try:
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
@pytest.fixture
@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
"""add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
if os.path.exists("{}".format(x)):
try:
for x in paths:
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
"input_ids": {"type": "int64",
"shape": [-1]},
"input_mask": {"type": "int64",
"shape": [1, -1]},
"segment_ids": {"type": "int64",
"shape": [2, -1]}
}
writer.set_header_size(1 << 14)
writer.set_page_size(1 << 15)
writer.add_schema(nlp_schema_json, "nlp_schema")
writer.add_index(["id", "rating"])
writer.write_raw_data(data)
writer.commit()
yield "yield_nlp_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
"input_ids": {"type": "int64",
"shape": [-1]},
"input_mask": {"type": "int64",
"shape": [1, -1]},
"segment_ids": {"type": "int64",
"shape": [2, -1]}
}
writer.set_header_size(1 << 14)
writer.set_page_size(1 << 15)
writer.add_schema(nlp_schema_json, "nlp_schema")
writer.add_index(["id", "rating"])
writer.write_raw_data(data)
writer.commit()
yield "yield_nlp_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
encoding='utf8')
assert item['label'] == padded_sample['label']
assert (item['data'] == np.array(list(padded_sample['data']))).all()
num_iter += 1
num_iter += 1
assert num_padded_iter == 5
assert num_iter == 15
@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
mask = [1] * length + [0] * (maxlen - length)
segment = [0] * maxlen
return input_, mask, segment
if __name__ == '__main__':
test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file)
test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file)
test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file)
test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file)

View File

@ -34,26 +34,32 @@ def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
if os.path.exists("{}".format(x)):
try:
for x in paths:
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
except FileNotFoundError:
continue
return data_list
if __name__ == '__main__':
test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file)
test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file)
test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file)
test_cv_minddataset_split_basic(add_and_remove_cv_file)
test_cv_minddataset_split_exact_percent(add_and_remove_cv_file)
test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file)
test_cv_minddataset_split_deterministic(add_and_remove_cv_file)
test_cv_minddataset_split_sharding(add_and_remove_cv_file)