refactor md ut

This commit is contained in:
liyong 2021-10-18 17:55:28 +08:00
parent 9d0d094437
commit d9aa18dfca
6 changed files with 478 additions and 330 deletions

View File

@ -31,20 +31,15 @@ from mindspore.dataset.vision import Inter
from mindspore.mindrecord import FileWriter
FILES_NUM = 4
CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord"
CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord"
CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord"
CV_DIR_NAME = "../data/mindrecord/testImageNetData"
NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord"
OLD_NLP_FILE_NAME = "../data/mindrecord/testOldVersion/aclImdb.mindrecord"
NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos"
NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt"
@pytest.fixture
def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -52,7 +47,7 @@ def add_and_remove_cv_file():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
@ -77,7 +72,8 @@ def add_and_remove_cv_file():
@pytest.fixture
def add_and_remove_nlp_file():
"""add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -85,7 +81,7 @@ def add_and_remove_nlp_file():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
@ -117,7 +113,8 @@ def add_and_remove_nlp_file():
@pytest.fixture
def add_and_remove_nlp_compress_file():
"""add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -125,7 +122,7 @@ def add_and_remove_nlp_compress_file():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = []
for row_id in range(16):
data.append({
@ -183,8 +180,9 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
"array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1])
})
num_readers = 1
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(
NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
file_name + "0", None, num_readers, shuffle=False)
assert data_set.get_dataset_size() == 16
num_iter = 0
for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)):
@ -197,29 +195,10 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
assert num_iter == 16
def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file):
"""tutorial for nlp minderdataset."""
num_readers = 1
data_set = ds.MindDataset(
NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
old_data_set = ds.MindDataset(
OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
assert old_data_set.get_dataset_size() == 16
num_iter = 0
for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True),
data_set.create_dict_iterator(num_epochs=1, output_numpy=True)):
assert (item["array_a"] == x["array_a"]).all()
assert (item["array_b"] == x["array_b"]).all()
assert (item["array_c"] == x["array_c"]).all()
assert (item["array_d"] == x["array_d"]).all()
assert item["label"] == x["label"]
num_iter += 1
assert num_iter == 16
def test_cv_minddataset_writer_tutorial():
"""tutorial for cv dataset writer."""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -227,7 +206,7 @@ def test_cv_minddataset_writer_tutorial():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"},
"data": {"type": "bytes"}}
@ -250,10 +229,11 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards, shard_id=partition_id)
num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
@ -272,10 +252,11 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id, num_samples=1)
@ -297,10 +278,11 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id, num_samples=2)
@ -322,10 +304,11 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, expect):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id, num_samples=3)
@ -346,8 +329,9 @@ def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5)
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5)
assert data_set.get_dataset_size() == 5
num_iter = 0
@ -366,9 +350,10 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
epoch1 = []
epoch2 = []
epoch3 = []
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards, shard_id=partition_id)
data_set = data_set.repeat(3)
@ -401,13 +386,14 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
num_shards = 3
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards, shard_id=partition_id)
data_set = data_set.repeat(3)
@ -436,13 +422,14 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
ds.config.set_seed(54321)
epoch1 = []
epoch2 = []
epoch3 = []
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
data_set = data_set.repeat(3)
num_iter = 0
@ -468,7 +455,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
epoch2_new_dataset = []
epoch3_new_dataset = []
data_set2 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
data_set2 = ds.MindDataset(file_name + "0", columns_list, num_readers)
data_set2 = data_set2.repeat(3)
num_iter = 0
@ -499,7 +486,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
epoch2_new_dataset2 = []
epoch3_new_dataset2 = []
data_set3 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
data_set3 = ds.MindDataset(file_name + "0", columns_list, num_readers)
data_set3 = data_set3.repeat(3)
num_iter = 0
@ -530,7 +517,8 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
assert data_set.get_dataset_size() == 10
repeat_num = 2
data_set = data_set.repeat(repeat_num)
@ -544,7 +532,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
"-------------- item[data]: {} ----------------------".format(item["data"]))
num_iter += 1
assert num_iter == 20
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=4, shard_id=3)
assert data_set.get_dataset_size() == 3
@ -553,7 +541,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
decode_op = vision.Decode()
data_set = data_set.map(
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
@ -584,7 +573,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
"""tutorial for cv minddataset."""
columns_list = ["data", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
decode_op = vision.Decode()
data_set = data_set.map(
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
@ -608,7 +598,8 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file):
"""issue 888 test."""
columns_list = ["data", "label"]
num_readers = 2
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1)
data_set = data_set.shuffle(2)
data_set = data_set.repeat(9)
num_iter = 0
@ -621,7 +612,8 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset([CV_FILE_NAME + str(x)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset([file_name + str(x)
for x in range(FILES_NUM)], columns_list, num_readers)
assert data_set.get_dataset_size() == 10
num_iter = 0
@ -644,7 +636,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset([file_name + "0"], columns_list, num_readers)
assert data_set.get_dataset_size() < 10
num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
@ -664,6 +657,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord"
CV2_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord"
try:
if os.path.exists(CV1_FILE_NAME):
os.remove(CV1_FILE_NAME)
@ -696,7 +691,8 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
writer.commit()
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME],
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME],
columns_list, num_readers)
assert data_set.get_dataset_size() == 30
num_iter = 0
@ -735,6 +731,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1"
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
@ -756,7 +753,8 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(2)] +
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset([file_name + str(x) for x in range(2)] +
[CV1_FILE_NAME + str(x) for x in range(2, 4)],
columns_list, num_readers)
assert data_set.get_dataset_size() < 20
@ -789,7 +787,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
assert data_set.get_dataset_size() == 10
num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
@ -810,7 +809,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
"""tutorial for nlp minderdataset."""
num_readers = 4
data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", None, num_readers)
assert data_set.get_dataset_size() == 10
num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
@ -839,7 +839,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
assert data_set.get_dataset_size() == 10
for _ in range(5):
num_iter = 0
@ -855,7 +856,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_
"""tutorial for cv minderdataset."""
columns_list = ["data", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
resize_height = 32
resize_width = 32
@ -881,7 +883,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_
def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
data_set = ds.MindDataset(CV_FILE_NAME + "0")
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0")
assert data_set.get_dataset_size() == 10
num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
@ -903,7 +906,8 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
repeat_num = 2
data_set = data_set.repeat(repeat_num)
num_iter = 0
@ -1753,7 +1757,8 @@ def test_write_with_multi_array_and_MindDataset():
def test_numpy_generic():
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -1761,7 +1766,7 @@ def test_numpy_generic():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
cv_schema_json = {"label1": {"type": "int32"}, "label2": {"type": "int64"},
"label3": {"type": "float32"}, "label4": {"type": "float64"}}
data = []
@ -1777,7 +1782,7 @@ def test_numpy_generic():
writer.commit()
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False)
data_set = ds.MindDataset(file_name + "0", None, num_readers, shuffle=False)
assert data_set.get_dataset_size() == 10
idx = 0
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
@ -1799,7 +1804,7 @@ def test_numpy_generic():
def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset():
mindrecord_file_name = "test.mindrecord"
mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord"
try:
data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32),
"float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471,
@ -2570,7 +2575,8 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files):
def test_field_is_null_numpy():
"""add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
if os.path.exists("{}".format(x)):
@ -2578,7 +2584,7 @@ def test_field_is_null_numpy():
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = []
# field array_d is null
for row_id in range(16):
@ -2607,7 +2613,7 @@ def test_field_is_null_numpy():
writer.write_raw_data(data)
writer.commit()
data_set = ds.MindDataset(dataset_file=NLP_FILE_NAME + "0",
data_set = ds.MindDataset(dataset_file=file_name + "0",
columns_list=["label", "array_a", "array_b", "array_d"],
num_parallel_workers=2,
shuffle=False)
@ -2639,8 +2645,9 @@ def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file):
"array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1])
})
num_readers = 1
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(
NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
file_name + "0", None, num_readers, shuffle=False)
assert data_set.get_dataset_size() == 16
# create_dict_iterator in for loop

View File

@ -28,26 +28,22 @@ from mindspore import log as logger
from mindspore.mindrecord import FileWriter
FILES_NUM = 4
CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord"
CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord"
CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord"
CV_DIR_NAME = "../data/mindrecord/testImageNetData"
NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord"
NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos"
NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt"
@pytest.fixture
def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
@ -72,7 +68,8 @@ def add_and_remove_cv_file():
@pytest.fixture
def add_and_remove_nlp_file():
"""add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -80,7 +77,7 @@ def add_and_remove_nlp_file():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
@ -118,7 +115,8 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
padded_sample['label'] = -1
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
assert data_set.get_dataset_size() == 15
num_iter = 0
num_padded_iter = 0
@ -145,7 +143,8 @@ def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_
padded_sample['label'] = -1
padded_sample['file_name'] = 99999
num_readers = 4
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
assert data_set.get_dataset_size() == 15
num_iter = 0
num_padded_iter = 0
@ -173,12 +172,13 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file):
padded_sample['label'] = -2
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded, dataset_size):
num_padded_iter = 0
num_iter = 0
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -213,6 +213,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f
padded_sample['label'] = -2
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded, dataset_size):
repeat_size = 5
@ -224,7 +225,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f
epoch3_shuffle_result = []
epoch4_shuffle_result = []
epoch5_shuffle_result = []
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -285,10 +286,11 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv
padded_sample['label'] = -2
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -310,10 +312,11 @@ def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_a
padded_sample['label'] = -2
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -332,10 +335,11 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re
padded_sample.pop('label', None)
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -356,10 +360,11 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c
padded_sample['label'] = -2
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
data_set = ds.MindDataset(file_name + "0", None, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -380,10 +385,11 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv
padded_sample = data[0]
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
data_set = ds.MindDataset(file_name + "0", None, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample)
@ -403,10 +409,11 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov
padded_sample = data[0]
padded_sample['file_name'] = 'dummy.jpg'
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded):
for partition_id in range(num_shards):
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
data_set = ds.MindDataset(file_name + "0", None, num_readers,
num_shards=num_shards,
shard_id=partition_id,
num_padded=num_padded)
@ -429,12 +436,13 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file):
padded_sample['input_ids'] = np.array([-1, -1, -1, -1], dtype=np.int64)
padded_sample['rating'] = 1.0
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded, dataset_size):
num_padded_iter = 0
num_iter = 0
for partition_id in range(num_shards):
data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -470,6 +478,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_
padded_sample['rating'] = 1.0
num_readers = 4
repeat_size = 3
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded, dataset_size):
num_padded_iter = 0
@ -479,7 +488,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_
epoch1_shuffle_result = []
epoch2_shuffle_result = []
epoch3_shuffle_result = []
data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,
@ -534,6 +543,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul
padded_sample['rating'] = 1.0
num_readers = 4
repeat_size = 3
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
def partitions(num_shards, num_padded, dataset_size):
num_padded_iter = 0
@ -542,7 +552,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul
epoch_result = [[["" for i in range(dataset_size)] for i in range(repeat_size)] for i in range(num_shards)]
for partition_id in range(num_shards):
data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
num_shards=num_shards,
shard_id=partition_id,
padded_sample=padded_sample,

View File

@ -25,14 +25,13 @@ from mindspore.dataset.text import to_str
from mindspore.mindrecord import FileWriter
FILES_NUM = 4
CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord"
CV_DIR_NAME = "../data/mindrecord/testImageNetData"
@pytest.fixture
def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
try:
for x in paths:
@ -40,7 +39,7 @@ def add_and_remove_cv_file():
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
@ -66,7 +65,8 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
num_readers = 4
sampler = ds.PKSampler(2)
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", None, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 6
@ -86,7 +86,8 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(2)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 6
@ -108,7 +109,8 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(3, None, True)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 9
@ -129,7 +131,8 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(3, None, True, 'label', 5)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 5
@ -150,7 +153,8 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(3, None, True, 'label', 10)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 9
@ -171,7 +175,8 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(5, None, True)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 15
num_iter = 0
@ -191,7 +196,8 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(5, None, True, 'label', 20)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 15
num_iter = 0
@ -211,7 +217,8 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.PKSampler(5, None, True, 'label', 10)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 10
num_iter = 0
@ -230,10 +237,11 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
indices = [1, 2, 3, 5, 7]
samplers = (ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices))
for sampler in samplers:
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 5
num_iter = 0
@ -255,9 +263,10 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
indices = [1, 2, 2, 5, 7, 9]
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
for sampler in samplers:
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 6
num_iter = 0
@ -279,9 +288,10 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
indices = []
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
for sampler in samplers:
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 0
num_iter = 0
@ -304,8 +314,9 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file
num_readers = 4
indices = [1, 2, 4, 11, 13]
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
for sampler in samplers:
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 5
num_iter = 0
@ -327,8 +338,9 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file):
num_readers = 4
indices = [1, 2, 4, -1, -2]
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
for sampler in samplers:
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 5
num_iter = 0
@ -350,7 +362,8 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
sampler = ds.RandomSampler()
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 10
num_iter = 0
@ -373,8 +386,9 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.RandomSampler()
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 10
ds1 = data_set.repeat(3)
@ -407,8 +421,9 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.RandomSampler(replacement=True, num_samples=5)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 5
num_iter = 0
@ -428,8 +443,9 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.RandomSampler(replacement=False, num_samples=2)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 2
num_iter = 0
@ -449,8 +465,9 @@ def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_fil
def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.RandomSampler(replacement=False, num_samples=20)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 10
num_iter = 0
@ -471,8 +488,9 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.SequentialSampler(1, 4)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
assert data_set.get_dataset_size() == 4
num_iter = 0
@ -495,8 +513,9 @@ def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.SequentialSampler(2, 10)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
dataset_size = data_set.get_dataset_size()
assert dataset_size == 10
@ -520,8 +539,9 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
sampler = ds.SequentialSampler(2, 20)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
sampler=sampler)
dataset_size = data_set.get_dataset_size()
assert dataset_size == 10
@ -545,7 +565,8 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d = ds.MindDataset(file_name + "0", columns_list,
num_readers, shuffle=False)
d1, d2 = d.split([8, 2], randomize=False)
assert d.get_dataset_size() == 10
@ -581,7 +602,8 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d = ds.MindDataset(file_name + "0", columns_list,
num_readers, shuffle=False)
d1, d2 = d.split([0.8, 0.2], randomize=False)
assert d.get_dataset_size() == 10
@ -617,7 +639,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d = ds.MindDataset(file_name + "0", columns_list,
num_readers, shuffle=False)
d1, d2 = d.split([0.41, 0.59], randomize=False)
assert d.get_dataset_size() == 10
@ -652,7 +675,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
def test_cv_minddataset_split_deterministic(add_and_remove_cv_file):
columns_list = ["data", "file_name", "label"]
num_readers = 4
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d = ds.MindDataset(file_name + "0", columns_list,
num_readers, shuffle=False)
# should set seed to avoid data overlap
ds.config.set_seed(111)
@ -693,7 +717,8 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file):
data = get_data(CV_DIR_NAME, True)
columns_list = ["data", "file_name", "label"]
num_readers = 4
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d = ds.MindDataset(file_name + "0", columns_list,
num_readers, shuffle=False)
# should set seed to avoid data overlap
ds.config.set_seed(111)

View File

@ -23,38 +23,25 @@ import mindspore.dataset as ds
from mindspore import log as logger
from mindspore.mindrecord import FileWriter
TEMP_FILE = "../data/mindrecord/testMindDataSet/temp.mindrecord"
AUTO_FILE = "../data/mindrecord/testMindDataSet/auto.mindrecord"
TFRECORD_FILES = "../data/mindrecord/testTFRecordData/dummy.tfrecord"
FILES_NUM = 1
num_readers = 1
@pytest.fixture(name="add_remove_file")
def fixture_remove():
def remove_file(file_name):
"""add/remove cv file"""
if os.path.exists("{}".format(TEMP_FILE)):
os.remove("{}".format(TEMP_FILE))
if os.path.exists("{}.db".format(TEMP_FILE)):
os.remove("{}.db".format(TEMP_FILE))
if os.path.exists("{}".format(file_name)):
os.remove("{}".format(file_name))
if os.path.exists("{}.db".format(file_name)):
os.remove("{}.db".format(file_name))
if os.path.exists("{}".format(AUTO_FILE)):
os.remove("{}".format(AUTO_FILE))
if os.path.exists("{}.db".format(AUTO_FILE)):
os.remove("{}.db".format(AUTO_FILE))
yield "yield_cv_data"
if os.path.exists("{}".format(TEMP_FILE)):
os.remove("{}".format(TEMP_FILE))
if os.path.exists("{}.db".format(TEMP_FILE)):
os.remove("{}.db".format(TEMP_FILE))
if os.path.exists("{}".format(AUTO_FILE)):
os.remove("{}".format(AUTO_FILE))
if os.path.exists("{}.db".format(AUTO_FILE)):
os.remove("{}.db".format(AUTO_FILE))
def test_case_00(add_remove_file): # only bin data
def test_case_00():
"""
Feature: save op
Description: all bin data
Expectation: generated mindrecord file
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data = [{"image1": bytes("image1 bytes abc", encoding='UTF-8'),
"image2": bytes("image1 bytes def", encoding='UTF-8'),
"image3": bytes("image1 bytes ghi", encoding='UTF-8'),
@ -86,13 +73,16 @@ def test_case_00(add_remove_file): # only bin data
"image3": {"type": "bytes"},
"image4": {"type": "bytes"},
"image5": {"type": "bytes"}}
writer = FileWriter(TEMP_FILE, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
writer.add_schema(schema, "schema")
writer.write_raw_data(data)
writer.commit()
d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False)
d1.save(AUTO_FILE, FILES_NUM)
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False)
d1.save(file_name_auto, FILES_NUM)
data_value_to_list = []
for item in data:
@ -104,7 +94,7 @@ def test_case_00(add_remove_file): # only bin data
new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8)
data_value_to_list.append(new_data)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
assert d2.get_dataset_size() == 5
@ -119,9 +109,12 @@ def test_case_00(add_remove_file): # only bin data
assert item[field] == data_value_to_list[num_iter][field]
num_iter += 1
assert num_iter == 5
remove_file(file_name)
remove_file(file_name_auto)
def test_case_01(add_remove_file): # only raw data
file_name_auto = './'
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data = [{"file_name": "001.jpg", "label": 43},
{"file_name": "002.jpg", "label": 91},
{"file_name": "003.jpg", "label": 61},
@ -132,13 +125,16 @@ def test_case_01(add_remove_file): # only raw data
"label": {"type": "int32"}
}
writer = FileWriter(TEMP_FILE, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
writer.add_schema(schema, "schema")
writer.write_raw_data(data)
writer.commit()
d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False)
d1.save(AUTO_FILE, FILES_NUM)
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False)
d1.save(file_name_auto, FILES_NUM)
data_value_to_list = []
for item in data:
@ -147,7 +143,7 @@ def test_case_01(add_remove_file): # only raw data
new_data['label'] = np.asarray(list([item["label"]]), dtype=np.int32)
data_value_to_list.append(new_data)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
assert d2.get_dataset_size() == 6
@ -163,9 +159,17 @@ def test_case_01(add_remove_file): # only raw data
assert item[field] == data_value_to_list[num_iter][field]
num_iter += 1
assert num_iter == 6
remove_file(file_name)
remove_file(file_name_auto)
def test_case_02(add_remove_file): # muti-bytes
def test_case_02(): # muti-bytes
"""
Feature: save op
Description: multiple byte fields
Expectation: generated mindrecord file
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
data = [{"file_name": "001.jpg", "label": 43,
"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32),
"float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471,
@ -258,13 +262,16 @@ def test_case_02(add_remove_file): # muti-bytes
"label": {"type": "int32"},
"image4": {"type": "bytes"},
"image5": {"type": "bytes"}}
writer = FileWriter(TEMP_FILE, FILES_NUM)
writer = FileWriter(file_name, FILES_NUM)
writer.add_schema(schema, "schema")
writer.write_raw_data(data)
writer.commit()
d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False)
d1.save(AUTO_FILE, FILES_NUM)
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False)
d1.save(file_name_auto, FILES_NUM)
data_value_to_list = []
for item in data:
@ -284,7 +291,7 @@ def test_case_02(add_remove_file): # muti-bytes
new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8)
data_value_to_list.append(new_data)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
assert d2.get_dataset_size() == 6
@ -303,6 +310,8 @@ def test_case_02(add_remove_file): # muti-bytes
assert item[field] == data_value_to_list[num_iter][field]
num_iter += 1
assert num_iter == 6
remove_file(file_name)
remove_file(file_name_auto)
def generator_1d():
@ -310,14 +319,21 @@ def generator_1d():
yield (np.array([i]),)
def test_case_03(add_remove_file):
def test_case_03():
"""
Feature: save op
Description: 1D numpy array
Expectation: generated mindrecord file
"""
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
# apply dataset operations
d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
d1.save(AUTO_FILE)
d1.save(file_name_auto)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
@ -327,6 +343,7 @@ def test_case_03(add_remove_file):
golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden)
i = i + 1
remove_file(file_name_auto)
def generator_with_type(t):
@ -335,6 +352,9 @@ def generator_with_type(t):
def type_tester(t):
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
logger.info("Test with Type {}".format(t.__name__))
# apply dataset operations
@ -344,9 +364,9 @@ def type_tester(t):
data1 = data1.repeat(3)
data1.save(AUTO_FILE)
data1.save(file_name_auto)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
@ -362,10 +382,7 @@ def type_tester(t):
i = 0
num_repeat += 1
assert num_repeat == 3
if os.path.exists("{}".format(AUTO_FILE)):
os.remove("{}".format(AUTO_FILE))
if os.path.exists("{}.db".format(AUTO_FILE)):
os.remove("{}.db".format(AUTO_FILE))
remove_file(file_name_auto)
def test_case_04():
@ -377,20 +394,31 @@ def test_case_04():
type_tester(t)
def test_case_05(add_remove_file):
def test_case_05():
"""
Feature: save op
Description: Exception Test
Expectation: exception
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
with pytest.raises(Exception, match="num_files should between 0 and 1000."):
d1.save(AUTO_FILE, 0)
d1.save(file_name, 0)
def test_case_06(add_remove_file):
def test_case_06():
"""
Feature: save op
Description: Exception Test
Expectation: exception
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
with pytest.raises(Exception, match="tfrecord dataset format is not supported."):
d1.save(AUTO_FILE, 1, "tfrecord")
d1.save(file_name, 1, "tfrecord")
def cast_name(key):
@ -405,16 +433,20 @@ def cast_name(key):
def test_case_07():
if os.path.exists("{}".format(AUTO_FILE)):
os.remove("{}".format(AUTO_FILE))
if os.path.exists("{}.db".format(AUTO_FILE)):
os.remove("{}.db".format(AUTO_FILE))
"""
Feature: save op
Description: save tfrecord files
Expectation: generated mindrecord file
"""
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
d1 = ds.TFRecordDataset(TFRECORD_FILES, shuffle=False)
tf_data = []
for x in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
tf_data.append(x)
d1.save(AUTO_FILE, FILES_NUM)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d1.save(file_name_auto, FILES_NUM)
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
mr_data = []
@ -429,11 +461,7 @@ def test_case_07():
assert v == mr_data[count][cast_name(k)]
count += 1
assert count == 10
if os.path.exists("{}".format(AUTO_FILE)):
os.remove("{}".format(AUTO_FILE))
if os.path.exists("{}.db".format(AUTO_FILE)):
os.remove("{}.db".format(AUTO_FILE))
remove_file(file_name_auto)
def generator_dynamic_1d():
@ -461,14 +489,21 @@ def generator_dynamic_2d_1():
yield (np.arange(10).reshape([5, 2]),)
def test_case_08(add_remove_file):
def test_case_08():
"""
Feature: save op
Description: save dynamic 1D numpy array
Expectation: generated mindrecord file
"""
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
# apply dataset operations
d1 = ds.GeneratorDataset(generator_dynamic_1d, ["data"], shuffle=False)
d1.save(AUTO_FILE)
d1.save(file_name_auto)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
@ -481,16 +516,23 @@ def test_case_08(add_remove_file):
golden = np.array(arr)
np.testing.assert_array_equal(item["data"], golden)
i = i + 1
remove_file(file_name_auto)
def test_case_09(add_remove_file):
def test_case_09():
"""
Feature: save op
Description: save dynamic 2D numpy array
Expectation: generated mindrecord file
"""
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
# apply dataset operations
d1 = ds.GeneratorDataset(generator_dynamic_2d_0, ["data"], shuffle=False)
d1.save(AUTO_FILE)
d1.save(file_name_auto)
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
d2 = ds.MindDataset(dataset_file=file_name_auto,
num_parallel_workers=num_readers,
shuffle=False)
@ -502,13 +544,23 @@ def test_case_09(add_remove_file):
golden = np.arange(10).reshape([2, 5])
np.testing.assert_array_equal(item["data"], golden)
i = i + 1
remove_file(file_name_auto)
def test_case_10(add_remove_file):
def test_case_10():
"""
Feature: save op
Description: save 2D Tensor of different shape
Expectation: Exception
"""
file_name_auto = './'
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
file_name_auto += '_auto'
# apply dataset operations
d1 = ds.GeneratorDataset(generator_dynamic_2d_1, ["data"], shuffle=False)
with pytest.raises(Exception, match=
"Error: besides dimension 0, other dimension shape is different from the previous's"):
d1.save(AUTO_FILE)
d1.save(file_name_auto)
remove_file(file_name_auto)

View File

@ -22,7 +22,6 @@ import os
import pytest
import numpy as np
from test_minddataset_sampler import add_and_remove_cv_file, get_data, CV_DIR_NAME, CV_FILE_NAME
from util import config_get_set_num_parallel_workers, config_get_set_seed
import mindspore.common.dtype as mstype
@ -509,38 +508,6 @@ def delete_json_files():
except IOError:
logger.info("Error while deleting: {}".format(f))
# Test save load minddataset
def skip_test_minddataset(add_and_remove_cv_file=True):
"""tutorial for cv minderdataset."""
columns_list = ["data", "file_name", "label"]
num_readers = 4
indices = [1, 2, 3, 5, 7]
sampler = ds.SubsetRandomSampler(indices)
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
sampler=sampler)
# Serializing into python dictionary
ds1_dict = ds.serialize(data_set)
# Serializing into json object
ds1_json = json.dumps(ds1_dict, sort_keys=True)
# Reconstruct dataset pipeline from its serialized form
data_set = ds.deserialize(input_dict=ds1_dict)
ds2_dict = ds.serialize(data_set)
# Serializing into json object
ds2_json = json.dumps(ds2_dict, sort_keys=True)
assert ds1_json == ds2_json
_ = get_data(CV_DIR_NAME)
assert data_set.get_dataset_size() == 5
num_iter = 0
for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1
assert num_iter == 5
if __name__ == '__main__':
test_serdes_imagefolder_dataset()
test_serdes_mnist_dataset()
@ -555,4 +522,3 @@ if __name__ == '__main__':
test_serdes_uniform_augment()
skip_test_serdes_fill()
test_serdes_exception()
skip_test_minddataset()

View File

@ -23,8 +23,6 @@ from mindspore import log as logger
from mindspore.mindrecord import FileWriter, FileReader, MindPage, SUCCESS
from mindspore.mindrecord import ParamValueError, MRMGetMetaError
CV_FILE_NAME = "./imagenet.mindrecord"
NLP_FILE_NAME = "./aclImdb.mindrecord"
FILES_NUM = 4
def remove_one_file(x):
@ -42,20 +40,6 @@ def remove_file(file_name):
x = file_name + str(i) + ".db"
remove_one_file(x)
@pytest.fixture
def fixture_cv_file():
"""add/remove file"""
remove_file(CV_FILE_NAME)
yield "yield_fixture_data"
remove_file(CV_FILE_NAME)
@pytest.fixture
def fixture_nlp_file():
"""add/remove file"""
remove_file(NLP_FILE_NAME)
yield "yield_fixture_data"
remove_file(NLP_FILE_NAME)
def test_cv_file_writer_shard_num_none():
"""test cv file writer when shard num is None."""
with pytest.raises(Exception, match="Shard num is illegal."):
@ -71,29 +55,29 @@ def test_cv_file_writer_shard_num_str():
def test_cv_page_reader_consumer_num_none():
"""test cv page reader when consumer number is None."""
with pytest.raises(Exception, match="Consumer number is illegal."):
MindPage(CV_FILE_NAME + "0", None)
MindPage("dummy.mindrecord", None)
def test_cv_page_reader_consumer_num_str():
"""test cv page reader when consumer number is string."""
with pytest.raises(Exception, match="Consumer number is illegal."):
MindPage(CV_FILE_NAME + "0", "2")
MindPage("dummy.mindrecord", "2")
def test_nlp_file_reader_consumer_num_none():
"""test nlp file reader when consumer number is None."""
with pytest.raises(Exception, match="Consumer number is illegal."):
FileReader(NLP_FILE_NAME + "0", None)
FileReader("dummy.mindrecord", None)
def test_nlp_file_reader_consumer_num_str():
"""test nlp file reader when consumer number is string."""
with pytest.raises(Exception, match="Consumer number is illegal."):
FileReader(NLP_FILE_NAME + "0", "4")
FileReader("dummy.mindrecord", "4")
def create_cv_mindrecord(files_num):
writer = FileWriter(CV_FILE_NAME, files_num)
def create_cv_mindrecord(files_num, file_name):
writer = FileWriter(file_name, files_num)
data = get_data("../data/mindrecord/testImageNetData/")
cv_schema_json = {"file_name": {"type": "string"},
"label": {"type": "int64"}, "data": {"type": "bytes"}}
@ -104,139 +88,218 @@ def create_cv_mindrecord(files_num):
def test_lack_partition_and_db():
"""test file reader when mindrecord file does not exist."""
"""
Feature: FileReader
Description: test file reader when mindrecord file does not exist
Expectation: exception occur
"""
with pytest.raises(RuntimeError) as err:
reader = FileReader('dummy.mindrecord')
reader.close()
assert 'Unexpected error. Invalid file, path:' in str(err.value)
def test_lack_db(fixture_cv_file):
"""test file reader when db file does not exist."""
create_cv_mindrecord(1)
os.remove("{}.db".format(CV_FILE_NAME))
def test_lack_db():
"""
Feature: FileReader
Description: test file reader when db file does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(1, file_name)
os.remove("{}.db".format(file_name))
with pytest.raises(RuntimeError) as err:
reader = FileReader(CV_FILE_NAME)
reader = FileReader(file_name)
reader.close()
assert 'Unexpected error. Invalid database file, path:' in str(err.value)
remove_file(file_name)
def test_lack_some_partition_and_db(fixture_cv_file):
"""test file reader when some partition and db do not exist."""
create_cv_mindrecord(4)
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
def test_lack_some_partition_and_db():
"""
Feature: FileReader
Description: test file reader when some partition and db do not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
os.remove("{}".format(paths[3]))
os.remove("{}.db".format(paths[3]))
with pytest.raises(RuntimeError) as err:
reader = FileReader(CV_FILE_NAME + "0")
reader = FileReader(file_name + "0")
reader.close()
assert 'Unexpected error. Invalid file, path:' in str(err.value)
remove_file(file_name)
def test_lack_some_partition_first(fixture_cv_file):
"""test file reader when first partition does not exist."""
create_cv_mindrecord(4)
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
def test_lack_some_partition_first():
"""
Feature: FileReader
Description: test file reader when first partition does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
os.remove("{}".format(paths[0]))
with pytest.raises(RuntimeError) as err:
reader = FileReader(CV_FILE_NAME + "0")
reader = FileReader(file_name + "0")
reader.close()
assert 'Unexpected error. Invalid file, path:' in str(err.value)
remove_file(file_name)
def test_lack_some_partition_middle(fixture_cv_file):
"""test file reader when some partition does not exist."""
create_cv_mindrecord(4)
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
def test_lack_some_partition_middle():
"""
Feature: FileReader
Description: test file reader when some partition does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
os.remove("{}".format(paths[1]))
with pytest.raises(RuntimeError) as err:
reader = FileReader(CV_FILE_NAME + "0")
reader = FileReader(file_name + "0")
reader.close()
assert 'Unexpected error. Invalid file, path:' in str(err.value)
remove_file(file_name)
def test_lack_some_partition_last(fixture_cv_file):
"""test file reader when last partition does not exist."""
create_cv_mindrecord(4)
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
def test_lack_some_partition_last():
"""
Feature: FileReader
Description: test file reader when last partition does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
os.remove("{}".format(paths[3]))
with pytest.raises(RuntimeError) as err:
reader = FileReader(CV_FILE_NAME + "0")
reader = FileReader(file_name + "0")
reader.close()
assert 'Unexpected error. Invalid file, path:' in str(err.value)
remove_file(file_name)
def test_mindpage_lack_some_partition(fixture_cv_file):
"""test page reader when some partition does not exist."""
create_cv_mindrecord(4)
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
def test_mindpage_lack_some_partition():
"""
Feature: MindPage
Description: test page reader when some partition does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
os.remove("{}".format(paths[0]))
with pytest.raises(RuntimeError) as err:
MindPage(CV_FILE_NAME + "0")
MindPage(file_name + "0")
assert 'Unexpected error. Invalid file, path:' in str(err.value)
remove_file(file_name)
def test_lack_some_db(fixture_cv_file):
"""test file reader when some db does not exist."""
create_cv_mindrecord(4)
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
def test_lack_some_db():
"""
Feature: FileReader
Description: test file reader when some db does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
os.remove("{}.db".format(paths[3]))
with pytest.raises(RuntimeError) as err:
reader = FileReader(CV_FILE_NAME + "0")
reader = FileReader(file_name + "0")
reader.close()
assert 'Unexpected error. Invalid database file, path:' in str(err.value)
remove_file(file_name)
def test_invalid_mindrecord():
"""test file reader when the content of mindrecord is illegal."""
with open(CV_FILE_NAME, 'w') as f:
"""
Feature: FileReader
Description: test file reader when the content of mindrecord is illegal
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
with open(file_name, 'w') as f:
dummy = 's' * 100
f.write(dummy)
with pytest.raises(RuntimeError) as err:
FileReader(CV_FILE_NAME)
FileReader(file_name)
assert "Unexpected error. Invalid file content, incorrect file or file header" in str(err.value)
os.remove(CV_FILE_NAME)
remove_file(file_name)
def test_invalid_db(fixture_cv_file):
"""test file reader when the content of db is illegal."""
create_cv_mindrecord(1)
os.remove("imagenet.mindrecord.db")
with open('imagenet.mindrecord.db', 'w') as f:
def test_invalid_db():
"""
Feature: FileReader
Description: test file reader when the content of db is illegal
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(1, file_name)
os.remove(file_name + ".db")
with open(file_name + ".db", 'w') as f:
f.write('just for test')
with pytest.raises(RuntimeError) as err:
FileReader('imagenet.mindrecord')
FileReader(file_name)
assert "Unexpected error. Failed to execute sql [ SELECT NAME from SHARD_NAME; ], " in str(err.value)
remove_file(file_name)
def test_overwrite_invalid_mindrecord(fixture_cv_file):
"""test file writer when overwrite invalid mindreocrd file."""
with open(CV_FILE_NAME, 'w') as f:
def test_overwrite_invalid_mindrecord():
"""
Feature: FileWriter
Description: test file writer when overwrite invalid mindreocrd file
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
with open(file_name, 'w') as f:
f.write('just for test')
with pytest.raises(RuntimeError) as err:
create_cv_mindrecord(1)
create_cv_mindrecord(1, file_name)
assert 'Unexpected error. Invalid file, Mindrecord files already existed in path:' in str(err.value)
remove_file(file_name)
def test_overwrite_invalid_db(fixture_cv_file):
"""test file writer when overwrite invalid db file."""
with open('imagenet.mindrecord.db', 'w') as f:
def test_overwrite_invalid_db():
"""
Feature: FileWriter
Description: test file writer when overwrite invalid db file
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
with open(file_name + '.db', 'w') as f:
f.write('just for test')
with pytest.raises(RuntimeError) as err:
create_cv_mindrecord(1)
create_cv_mindrecord(1, file_name)
assert 'Unexpected error. Failed to write data to db.' in str(err.value)
remove_file(file_name)
def test_read_after_close(fixture_cv_file):
"""test file reader when close read."""
create_cv_mindrecord(1)
reader = FileReader(CV_FILE_NAME)
def test_read_after_close():
"""
Feature: FileReader
Description: test file reader when close read
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(1, file_name)
reader = FileReader(file_name)
reader.close()
count = 0
for index, x in enumerate(reader.get_next()):
count = count + 1
logger.info("#item{}: {}".format(index, x))
assert count == 0
remove_file(file_name)
def test_file_read_after_read(fixture_cv_file):
"""test file reader when finish read."""
create_cv_mindrecord(1)
reader = FileReader(CV_FILE_NAME)
def test_file_read_after_read():
"""
Feature: FileReader
Description: test file reader when finish read
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(1, file_name)
reader = FileReader(file_name)
count = 0
for index, x in enumerate(reader.get_next()):
assert len(x) == 3
@ -249,25 +312,40 @@ def test_file_read_after_read(fixture_cv_file):
cnt = cnt + 1
logger.info("#item{}: {}".format(index, x))
assert cnt == 0
remove_file(file_name)
def test_cv_file_writer_shard_num_greater_than_1000():
"""test cv file writer shard number greater than 1000."""
"""
Feature: FileWriter
Description: test cv file writer shard number greater than 1000
Expectation: exception occur
"""
with pytest.raises(ParamValueError) as err:
FileWriter(CV_FILE_NAME, 1001)
FileWriter('dummy.mindrecord', 1001)
assert 'Shard number should between' in str(err.value)
def test_add_index_without_add_schema():
"""
Feature: FileWriter
Description: test add index without adding schema
Expectation: exception occur
"""
with pytest.raises(MRMGetMetaError) as err:
fw = FileWriter(CV_FILE_NAME)
fw = FileWriter('dummy.mindrecord')
fw.add_index(["label"])
assert 'Failed to get meta info' in str(err.value)
def test_mindpage_pageno_pagesize_not_int(fixture_cv_file):
"""test page reader when some partition does not exist."""
create_cv_mindrecord(4)
reader = MindPage(CV_FILE_NAME + "0")
def test_mindpage_pageno_pagesize_not_int():
"""
Feature: MindPage
Description: test page reader when some partition does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
reader = MindPage(file_name + "0")
fields = reader.get_category_fields()
assert fields == ['file_name', 'label'], \
'failed on getting candidate category fields.'
@ -293,12 +371,18 @@ def test_mindpage_pageno_pagesize_not_int(fixture_cv_file):
with pytest.raises(RuntimeError, match=r"Unexpected error. Invalid data, "
r"category_id: 99999 must be in the range \[0, 10\]."):
reader.read_at_page_by_id(99999, 0, 1)
remove_file(file_name)
def test_mindpage_filename_not_exist(fixture_cv_file):
"""test page reader when some partition does not exist."""
create_cv_mindrecord(4)
reader = MindPage(CV_FILE_NAME + "0")
def test_mindpage_filename_not_exist():
"""
Feature: FileWrite
Description: test page reader when some partition does not exist
Expectation: exception occur
"""
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
create_cv_mindrecord(4, file_name)
reader = MindPage(file_name + "0")
fields = reader.get_category_fields()
assert fields == ['file_name', 'label'], \
'failed on getting candidate category fields.'
@ -319,11 +403,15 @@ def test_mindpage_filename_not_exist(fixture_cv_file):
with pytest.raises(ParamValueError):
reader.read_at_page_by_name(1, 0, 1)
_ = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
remove_file(file_name)
def test_invalid_schema():
mindrecord_file_name = "test.mindrecord"
"""
Feature: FileWrite
Description: test invalid schema
Expectation: exception occur
"""
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
writer = FileWriter(mindrecord_file_name)
# string => str
@ -462,7 +550,7 @@ def test_invalid_schema():
os.remove("{}.db".format(mindrecord_file_name))
def test_write_with_invalid_data():
mindrecord_file_name = "test.mindrecord"
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
# field: file_name => filename
with pytest.raises(RuntimeError, match="Unexpected error. Invalid data, schema count should be positive."):