refactor md ut
This commit is contained in:
parent
9d0d094437
commit
d9aa18dfca
|
@ -31,20 +31,15 @@ from mindspore.dataset.vision import Inter
|
|||
from mindspore.mindrecord import FileWriter
|
||||
|
||||
FILES_NUM = 4
|
||||
CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord"
|
||||
CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord"
|
||||
CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord"
|
||||
CV_DIR_NAME = "../data/mindrecord/testImageNetData"
|
||||
NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord"
|
||||
OLD_NLP_FILE_NAME = "../data/mindrecord/testOldVersion/aclImdb.mindrecord"
|
||||
NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos"
|
||||
NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def add_and_remove_cv_file():
|
||||
"""add/remove cv file"""
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -52,7 +47,7 @@ def add_and_remove_cv_file():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = get_data(CV_DIR_NAME)
|
||||
cv_schema_json = {"id": {"type": "int32"},
|
||||
"file_name": {"type": "string"},
|
||||
|
@ -77,7 +72,8 @@ def add_and_remove_cv_file():
|
|||
@pytest.fixture
|
||||
def add_and_remove_nlp_file():
|
||||
"""add/remove nlp file"""
|
||||
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -85,7 +81,7 @@ def add_and_remove_nlp_file():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
|
||||
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
|
||||
"rating": {"type": "float32"},
|
||||
|
@ -117,7 +113,8 @@ def add_and_remove_nlp_file():
|
|||
@pytest.fixture
|
||||
def add_and_remove_nlp_compress_file():
|
||||
"""add/remove nlp file"""
|
||||
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -125,7 +122,7 @@ def add_and_remove_nlp_compress_file():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = []
|
||||
for row_id in range(16):
|
||||
data.append({
|
||||
|
@ -183,8 +180,9 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
|
|||
"array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1])
|
||||
})
|
||||
num_readers = 1
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(
|
||||
NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
|
||||
file_name + "0", None, num_readers, shuffle=False)
|
||||
assert data_set.get_dataset_size() == 16
|
||||
num_iter = 0
|
||||
for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)):
|
||||
|
@ -197,29 +195,10 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
|
|||
assert num_iter == 16
|
||||
|
||||
|
||||
def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file):
|
||||
"""tutorial for nlp minderdataset."""
|
||||
num_readers = 1
|
||||
data_set = ds.MindDataset(
|
||||
NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
|
||||
old_data_set = ds.MindDataset(
|
||||
OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
|
||||
assert old_data_set.get_dataset_size() == 16
|
||||
num_iter = 0
|
||||
for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True),
|
||||
data_set.create_dict_iterator(num_epochs=1, output_numpy=True)):
|
||||
assert (item["array_a"] == x["array_a"]).all()
|
||||
assert (item["array_b"] == x["array_b"]).all()
|
||||
assert (item["array_c"] == x["array_c"]).all()
|
||||
assert (item["array_d"] == x["array_d"]).all()
|
||||
assert item["label"] == x["label"]
|
||||
num_iter += 1
|
||||
assert num_iter == 16
|
||||
|
||||
|
||||
def test_cv_minddataset_writer_tutorial():
|
||||
"""tutorial for cv dataset writer."""
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -227,7 +206,7 @@ def test_cv_minddataset_writer_tutorial():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = get_data(CV_DIR_NAME)
|
||||
cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"},
|
||||
"data": {"type": "bytes"}}
|
||||
|
@ -250,10 +229,11 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards, shard_id=partition_id)
|
||||
num_iter = 0
|
||||
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -272,10 +252,11 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id, num_samples=1)
|
||||
|
||||
|
@ -297,10 +278,11 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id, num_samples=2)
|
||||
|
||||
|
@ -322,10 +304,11 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, expect):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id, num_samples=3)
|
||||
|
||||
|
@ -346,8 +329,9 @@ def test_cv_minddataset_partition_num_samples_3(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5)
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, num_shards=1, shard_id=0, num_samples=5)
|
||||
|
||||
assert data_set.get_dataset_size() == 5
|
||||
num_iter = 0
|
||||
|
@ -366,9 +350,10 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
|
|||
epoch1 = []
|
||||
epoch2 = []
|
||||
epoch3 = []
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards, shard_id=partition_id)
|
||||
|
||||
data_set = data_set.repeat(3)
|
||||
|
@ -401,13 +386,14 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
num_shards = 3
|
||||
epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result
|
||||
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result
|
||||
[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result
|
||||
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards, shard_id=partition_id)
|
||||
|
||||
data_set = data_set.repeat(3)
|
||||
|
@ -436,13 +422,14 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
ds.config.set_seed(54321)
|
||||
epoch1 = []
|
||||
epoch2 = []
|
||||
epoch3 = []
|
||||
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
data_set = data_set.repeat(3)
|
||||
|
||||
num_iter = 0
|
||||
|
@ -468,7 +455,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
|
|||
epoch2_new_dataset = []
|
||||
epoch3_new_dataset = []
|
||||
|
||||
data_set2 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
data_set2 = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
data_set2 = data_set2.repeat(3)
|
||||
|
||||
num_iter = 0
|
||||
|
@ -499,7 +486,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
|
|||
epoch2_new_dataset2 = []
|
||||
epoch3_new_dataset2 = []
|
||||
|
||||
data_set3 = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
data_set3 = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
data_set3 = data_set3.repeat(3)
|
||||
|
||||
num_iter = 0
|
||||
|
@ -530,7 +517,8 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
repeat_num = 2
|
||||
data_set = data_set.repeat(repeat_num)
|
||||
|
@ -544,7 +532,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
|
|||
"-------------- item[data]: {} ----------------------".format(item["data"]))
|
||||
num_iter += 1
|
||||
assert num_iter == 20
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=4, shard_id=3)
|
||||
assert data_set.get_dataset_size() == 3
|
||||
|
||||
|
@ -553,7 +541,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
decode_op = vision.Decode()
|
||||
data_set = data_set.map(
|
||||
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
|
||||
|
@ -584,7 +573,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
|
|||
"""tutorial for cv minddataset."""
|
||||
columns_list = ["data", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
decode_op = vision.Decode()
|
||||
data_set = data_set.map(
|
||||
input_columns=["data"], operations=decode_op, num_parallel_workers=2)
|
||||
|
@ -608,7 +598,8 @@ def test_cv_minddataset_issue_888(add_and_remove_cv_file):
|
|||
"""issue 888 test."""
|
||||
columns_list = ["data", "label"]
|
||||
num_readers = 2
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, shuffle=False, num_shards=5, shard_id=1)
|
||||
data_set = data_set.shuffle(2)
|
||||
data_set = data_set.repeat(9)
|
||||
num_iter = 0
|
||||
|
@ -621,7 +612,8 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset([CV_FILE_NAME + str(x)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset([file_name + str(x)
|
||||
for x in range(FILES_NUM)], columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
|
@ -644,7 +636,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset([file_name + "0"], columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() < 10
|
||||
num_iter = 0
|
||||
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -664,6 +657,8 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
|
|||
|
||||
def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
|
||||
"""tutorial for cv minderdataset."""
|
||||
CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_1.mindrecord"
|
||||
CV2_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_2.mindrecord"
|
||||
try:
|
||||
if os.path.exists(CV1_FILE_NAME):
|
||||
os.remove(CV1_FILE_NAME)
|
||||
|
@ -696,7 +691,8 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
|
|||
writer.commit()
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME],
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset([file_name + str(x) for x in range(FILES_NUM)] + [CV1_FILE_NAME, CV2_FILE_NAME],
|
||||
columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() == 30
|
||||
num_iter = 0
|
||||
|
@ -735,6 +731,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
|
|||
|
||||
|
||||
def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
|
||||
CV1_FILE_NAME = "../data/mindrecord/test_cv_minddataset_reader_two_dataset_partition_1"
|
||||
paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
|
@ -756,7 +753,8 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
|
|||
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset([CV_FILE_NAME + str(x) for x in range(2)] +
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset([file_name + str(x) for x in range(2)] +
|
||||
[CV1_FILE_NAME + str(x) for x in range(2, 4)],
|
||||
columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() < 20
|
||||
|
@ -789,7 +787,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -810,7 +809,8 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
|
|||
def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
|
||||
"""tutorial for nlp minderdataset."""
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", None, num_readers)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -839,7 +839,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
for _ in range(5):
|
||||
num_iter = 0
|
||||
|
@ -855,7 +856,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
|
||||
resize_height = 32
|
||||
resize_width = 32
|
||||
|
@ -881,7 +883,8 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_
|
|||
|
||||
def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file):
|
||||
"""tutorial for cv minderdataset."""
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0")
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0")
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -903,7 +906,8 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers)
|
||||
repeat_num = 2
|
||||
data_set = data_set.repeat(repeat_num)
|
||||
num_iter = 0
|
||||
|
@ -1753,7 +1757,8 @@ def test_write_with_multi_array_and_MindDataset():
|
|||
|
||||
|
||||
def test_numpy_generic():
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -1761,7 +1766,7 @@ def test_numpy_generic():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
cv_schema_json = {"label1": {"type": "int32"}, "label2": {"type": "int64"},
|
||||
"label3": {"type": "float32"}, "label4": {"type": "float64"}}
|
||||
data = []
|
||||
|
@ -1777,7 +1782,7 @@ def test_numpy_generic():
|
|||
writer.commit()
|
||||
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False)
|
||||
data_set = ds.MindDataset(file_name + "0", None, num_readers, shuffle=False)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
idx = 0
|
||||
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
|
@ -1799,7 +1804,7 @@ def test_numpy_generic():
|
|||
|
||||
|
||||
def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset():
|
||||
mindrecord_file_name = "test.mindrecord"
|
||||
mindrecord_file_name = "test_write_with_float32_float64_float32_array_float64_array_and_MindDataset.mindrecord"
|
||||
try:
|
||||
data = [{"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32),
|
||||
"float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471,
|
||||
|
@ -2570,7 +2575,8 @@ def test_distributed_shuffle_with_multi_epochs(create_multi_mindrecord_files):
|
|||
|
||||
def test_field_is_null_numpy():
|
||||
"""add/remove nlp file"""
|
||||
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
for x in paths:
|
||||
if os.path.exists("{}".format(x)):
|
||||
|
@ -2578,7 +2584,7 @@ def test_field_is_null_numpy():
|
|||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
|
||||
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = []
|
||||
# field array_d is null
|
||||
for row_id in range(16):
|
||||
|
@ -2607,7 +2613,7 @@ def test_field_is_null_numpy():
|
|||
writer.write_raw_data(data)
|
||||
writer.commit()
|
||||
|
||||
data_set = ds.MindDataset(dataset_file=NLP_FILE_NAME + "0",
|
||||
data_set = ds.MindDataset(dataset_file=file_name + "0",
|
||||
columns_list=["label", "array_a", "array_b", "array_d"],
|
||||
num_parallel_workers=2,
|
||||
shuffle=False)
|
||||
|
@ -2639,8 +2645,9 @@ def test_for_loop_dataset_iterator(add_and_remove_nlp_compress_file):
|
|||
"array_d": np.reshape(np.array([[-10, -127], [10, 127]]), [2, -1])
|
||||
})
|
||||
num_readers = 1
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(
|
||||
NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
|
||||
file_name + "0", None, num_readers, shuffle=False)
|
||||
assert data_set.get_dataset_size() == 16
|
||||
|
||||
# create_dict_iterator in for loop
|
||||
|
|
|
@ -28,26 +28,22 @@ from mindspore import log as logger
|
|||
from mindspore.mindrecord import FileWriter
|
||||
|
||||
FILES_NUM = 4
|
||||
CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord"
|
||||
CV1_FILE_NAME = "../data/mindrecord/imagenet1.mindrecord"
|
||||
CV2_FILE_NAME = "../data/mindrecord/imagenet2.mindrecord"
|
||||
CV_DIR_NAME = "../data/mindrecord/testImageNetData"
|
||||
NLP_FILE_NAME = "../data/mindrecord/aclImdb.mindrecord"
|
||||
NLP_FILE_POS = "../data/mindrecord/testAclImdbData/pos"
|
||||
NLP_FILE_VOCAB = "../data/mindrecord/testAclImdbData/vocab.txt"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def add_and_remove_cv_file():
|
||||
"""add/remove cv file"""
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
|
||||
os.remove("{}.db".format(x)) if os.path.exists(
|
||||
"{}.db".format(x)) else None
|
||||
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = get_data(CV_DIR_NAME)
|
||||
cv_schema_json = {"id": {"type": "int32"},
|
||||
"file_name": {"type": "string"},
|
||||
|
@ -72,7 +68,8 @@ def add_and_remove_cv_file():
|
|||
@pytest.fixture
|
||||
def add_and_remove_nlp_file():
|
||||
"""add/remove nlp file"""
|
||||
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -80,7 +77,7 @@ def add_and_remove_nlp_file():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
|
||||
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
|
||||
"rating": {"type": "float32"},
|
||||
|
@ -118,7 +115,8 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
|
|||
padded_sample['label'] = -1
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
|
||||
assert data_set.get_dataset_size() == 15
|
||||
num_iter = 0
|
||||
num_padded_iter = 0
|
||||
|
@ -145,7 +143,8 @@ def test_cv_minddataset_reader_basic_padded_samples_type_cast(add_and_remove_cv_
|
|||
padded_sample['label'] = -1
|
||||
padded_sample['file_name'] = 99999
|
||||
num_readers = 4
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers, padded_sample=padded_sample, num_padded=5)
|
||||
assert data_set.get_dataset_size() == 15
|
||||
num_iter = 0
|
||||
num_padded_iter = 0
|
||||
|
@ -173,12 +172,13 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file):
|
|||
padded_sample['label'] = -2
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded, dataset_size):
|
||||
num_padded_iter = 0
|
||||
num_iter = 0
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -213,6 +213,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f
|
|||
padded_sample['label'] = -2
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded, dataset_size):
|
||||
repeat_size = 5
|
||||
|
@ -224,7 +225,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f
|
|||
epoch3_shuffle_result = []
|
||||
epoch4_shuffle_result = []
|
||||
epoch5_shuffle_result = []
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -285,10 +286,11 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv
|
|||
padded_sample['label'] = -2
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -310,10 +312,11 @@ def test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_a
|
|||
padded_sample['label'] = -2
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -332,10 +335,11 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re
|
|||
padded_sample.pop('label', None)
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -356,10 +360,11 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c
|
|||
padded_sample['label'] = -2
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", None, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -380,10 +385,11 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv
|
|||
padded_sample = data[0]
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", None, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample)
|
||||
|
@ -403,10 +409,11 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov
|
|||
padded_sample = data[0]
|
||||
padded_sample['file_name'] = 'dummy.jpg'
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded):
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", None, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
num_padded=num_padded)
|
||||
|
@ -429,12 +436,13 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file):
|
|||
padded_sample['input_ids'] = np.array([-1, -1, -1, -1], dtype=np.int64)
|
||||
padded_sample['rating'] = 1.0
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded, dataset_size):
|
||||
num_padded_iter = 0
|
||||
num_iter = 0
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -470,6 +478,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_
|
|||
padded_sample['rating'] = 1.0
|
||||
num_readers = 4
|
||||
repeat_size = 3
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded, dataset_size):
|
||||
num_padded_iter = 0
|
||||
|
@ -479,7 +488,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_
|
|||
epoch1_shuffle_result = []
|
||||
epoch2_shuffle_result = []
|
||||
epoch3_shuffle_result = []
|
||||
data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
@ -534,6 +543,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul
|
|||
padded_sample['rating'] = 1.0
|
||||
num_readers = 4
|
||||
repeat_size = 3
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
def partitions(num_shards, num_padded, dataset_size):
|
||||
num_padded_iter = 0
|
||||
|
@ -542,7 +552,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul
|
|||
epoch_result = [[["" for i in range(dataset_size)] for i in range(repeat_size)] for i in range(num_shards)]
|
||||
|
||||
for partition_id in range(num_shards):
|
||||
data_set = ds.MindDataset(NLP_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
num_shards=num_shards,
|
||||
shard_id=partition_id,
|
||||
padded_sample=padded_sample,
|
||||
|
|
|
@ -25,14 +25,13 @@ from mindspore.dataset.text import to_str
|
|||
from mindspore.mindrecord import FileWriter
|
||||
|
||||
FILES_NUM = 4
|
||||
CV_FILE_NAME = "../data/mindrecord/imagenet.mindrecord"
|
||||
CV_DIR_NAME = "../data/mindrecord/testImageNetData"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def add_and_remove_cv_file():
|
||||
"""add/remove cv file"""
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
try:
|
||||
for x in paths:
|
||||
|
@ -40,7 +39,7 @@ def add_and_remove_cv_file():
|
|||
os.remove("{}".format(x))
|
||||
if os.path.exists("{}.db".format(x)):
|
||||
os.remove("{}.db".format(x))
|
||||
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
data = get_data(CV_DIR_NAME, True)
|
||||
cv_schema_json = {"id": {"type": "int32"},
|
||||
"file_name": {"type": "string"},
|
||||
|
@ -66,7 +65,8 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(2)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", None, num_readers,
|
||||
sampler=sampler)
|
||||
|
||||
assert data_set.get_dataset_size() == 6
|
||||
|
@ -86,7 +86,8 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(2)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
|
||||
assert data_set.get_dataset_size() == 6
|
||||
|
@ -108,7 +109,8 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(3, None, True)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
|
||||
assert data_set.get_dataset_size() == 9
|
||||
|
@ -129,7 +131,8 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(3, None, True, 'label', 5)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
|
||||
assert data_set.get_dataset_size() == 5
|
||||
|
@ -150,7 +153,8 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(3, None, True, 'label', 10)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
|
||||
assert data_set.get_dataset_size() == 9
|
||||
|
@ -171,7 +175,8 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(5, None, True)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 15
|
||||
num_iter = 0
|
||||
|
@ -191,7 +196,8 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(5, None, True, 'label', 20)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 15
|
||||
num_iter = 0
|
||||
|
@ -211,7 +217,8 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.PKSampler(5, None, True, 'label', 10)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
|
@ -230,10 +237,11 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file):
|
|||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
indices = [1, 2, 3, 5, 7]
|
||||
samplers = (ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices))
|
||||
for sampler in samplers:
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 5
|
||||
num_iter = 0
|
||||
|
@ -255,9 +263,10 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
indices = [1, 2, 2, 5, 7, 9]
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
|
||||
for sampler in samplers:
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 6
|
||||
num_iter = 0
|
||||
|
@ -279,9 +288,10 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
indices = []
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
|
||||
for sampler in samplers:
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 0
|
||||
num_iter = 0
|
||||
|
@ -304,8 +314,9 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file
|
|||
num_readers = 4
|
||||
indices = [1, 2, 4, 11, 13]
|
||||
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
for sampler in samplers:
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 5
|
||||
num_iter = 0
|
||||
|
@ -327,8 +338,9 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file):
|
|||
num_readers = 4
|
||||
indices = [1, 2, 4, -1, -2]
|
||||
samplers = ds.SubsetRandomSampler(indices), ds.SubsetSampler(indices)
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
for sampler in samplers:
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 5
|
||||
num_iter = 0
|
||||
|
@ -350,7 +362,8 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
|
|||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
sampler = ds.RandomSampler()
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
|
@ -373,8 +386,9 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
|
|||
def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.RandomSampler()
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
ds1 = data_set.repeat(3)
|
||||
|
@ -407,8 +421,9 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
|
|||
def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.RandomSampler(replacement=True, num_samples=5)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 5
|
||||
num_iter = 0
|
||||
|
@ -428,8 +443,9 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
|
|||
def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_file):
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.RandomSampler(replacement=False, num_samples=2)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 2
|
||||
num_iter = 0
|
||||
|
@ -449,8 +465,9 @@ def test_cv_minddataset_random_sampler_replacement_false_1(add_and_remove_cv_fil
|
|||
def test_cv_minddataset_random_sampler_replacement_false_2(add_and_remove_cv_file):
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.RandomSampler(replacement=False, num_samples=20)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 10
|
||||
num_iter = 0
|
||||
|
@ -471,8 +488,9 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.SequentialSampler(1, 4)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
assert data_set.get_dataset_size() == 4
|
||||
num_iter = 0
|
||||
|
@ -495,8 +513,9 @@ def test_cv_minddataset_sequential_sampler_offeset(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.SequentialSampler(2, 10)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
dataset_size = data_set.get_dataset_size()
|
||||
assert dataset_size == 10
|
||||
|
@ -520,8 +539,9 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
sampler = ds.SequentialSampler(2, 20)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
data_set = ds.MindDataset(file_name + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
dataset_size = data_set.get_dataset_size()
|
||||
assert dataset_size == 10
|
||||
|
@ -545,7 +565,8 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
d = ds.MindDataset(file_name + "0", columns_list,
|
||||
num_readers, shuffle=False)
|
||||
d1, d2 = d.split([8, 2], randomize=False)
|
||||
assert d.get_dataset_size() == 10
|
||||
|
@ -581,7 +602,8 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
d = ds.MindDataset(file_name + "0", columns_list,
|
||||
num_readers, shuffle=False)
|
||||
d1, d2 = d.split([0.8, 0.2], randomize=False)
|
||||
assert d.get_dataset_size() == 10
|
||||
|
@ -617,7 +639,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
d = ds.MindDataset(file_name + "0", columns_list,
|
||||
num_readers, shuffle=False)
|
||||
d1, d2 = d.split([0.41, 0.59], randomize=False)
|
||||
assert d.get_dataset_size() == 10
|
||||
|
@ -652,7 +675,8 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
|
|||
def test_cv_minddataset_split_deterministic(add_and_remove_cv_file):
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
d = ds.MindDataset(file_name + "0", columns_list,
|
||||
num_readers, shuffle=False)
|
||||
# should set seed to avoid data overlap
|
||||
ds.config.set_seed(111)
|
||||
|
@ -693,7 +717,8 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file):
|
|||
data = get_data(CV_DIR_NAME, True)
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
d = ds.MindDataset(CV_FILE_NAME + "0", columns_list,
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
d = ds.MindDataset(file_name + "0", columns_list,
|
||||
num_readers, shuffle=False)
|
||||
# should set seed to avoid data overlap
|
||||
ds.config.set_seed(111)
|
||||
|
|
|
@ -23,38 +23,25 @@ import mindspore.dataset as ds
|
|||
from mindspore import log as logger
|
||||
from mindspore.mindrecord import FileWriter
|
||||
|
||||
TEMP_FILE = "../data/mindrecord/testMindDataSet/temp.mindrecord"
|
||||
AUTO_FILE = "../data/mindrecord/testMindDataSet/auto.mindrecord"
|
||||
TFRECORD_FILES = "../data/mindrecord/testTFRecordData/dummy.tfrecord"
|
||||
FILES_NUM = 1
|
||||
num_readers = 1
|
||||
|
||||
|
||||
@pytest.fixture(name="add_remove_file")
|
||||
def fixture_remove():
|
||||
def remove_file(file_name):
|
||||
"""add/remove cv file"""
|
||||
if os.path.exists("{}".format(TEMP_FILE)):
|
||||
os.remove("{}".format(TEMP_FILE))
|
||||
if os.path.exists("{}.db".format(TEMP_FILE)):
|
||||
os.remove("{}.db".format(TEMP_FILE))
|
||||
if os.path.exists("{}".format(file_name)):
|
||||
os.remove("{}".format(file_name))
|
||||
if os.path.exists("{}.db".format(file_name)):
|
||||
os.remove("{}.db".format(file_name))
|
||||
|
||||
if os.path.exists("{}".format(AUTO_FILE)):
|
||||
os.remove("{}".format(AUTO_FILE))
|
||||
if os.path.exists("{}.db".format(AUTO_FILE)):
|
||||
os.remove("{}.db".format(AUTO_FILE))
|
||||
yield "yield_cv_data"
|
||||
if os.path.exists("{}".format(TEMP_FILE)):
|
||||
os.remove("{}".format(TEMP_FILE))
|
||||
if os.path.exists("{}.db".format(TEMP_FILE)):
|
||||
os.remove("{}.db".format(TEMP_FILE))
|
||||
|
||||
if os.path.exists("{}".format(AUTO_FILE)):
|
||||
os.remove("{}".format(AUTO_FILE))
|
||||
if os.path.exists("{}.db".format(AUTO_FILE)):
|
||||
os.remove("{}.db".format(AUTO_FILE))
|
||||
|
||||
|
||||
def test_case_00(add_remove_file): # only bin data
|
||||
def test_case_00():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: all bin data
|
||||
Expectation: generated mindrecord file
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data = [{"image1": bytes("image1 bytes abc", encoding='UTF-8'),
|
||||
"image2": bytes("image1 bytes def", encoding='UTF-8'),
|
||||
"image3": bytes("image1 bytes ghi", encoding='UTF-8'),
|
||||
|
@ -86,13 +73,16 @@ def test_case_00(add_remove_file): # only bin data
|
|||
"image3": {"type": "bytes"},
|
||||
"image4": {"type": "bytes"},
|
||||
"image5": {"type": "bytes"}}
|
||||
writer = FileWriter(TEMP_FILE, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
writer.add_schema(schema, "schema")
|
||||
writer.write_raw_data(data)
|
||||
writer.commit()
|
||||
|
||||
d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False)
|
||||
d1.save(AUTO_FILE, FILES_NUM)
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False)
|
||||
d1.save(file_name_auto, FILES_NUM)
|
||||
data_value_to_list = []
|
||||
|
||||
for item in data:
|
||||
|
@ -104,7 +94,7 @@ def test_case_00(add_remove_file): # only bin data
|
|||
new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8)
|
||||
data_value_to_list.append(new_data)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
assert d2.get_dataset_size() == 5
|
||||
|
@ -119,9 +109,12 @@ def test_case_00(add_remove_file): # only bin data
|
|||
assert item[field] == data_value_to_list[num_iter][field]
|
||||
num_iter += 1
|
||||
assert num_iter == 5
|
||||
remove_file(file_name)
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def test_case_01(add_remove_file): # only raw data
|
||||
file_name_auto = './'
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data = [{"file_name": "001.jpg", "label": 43},
|
||||
{"file_name": "002.jpg", "label": 91},
|
||||
{"file_name": "003.jpg", "label": 61},
|
||||
|
@ -132,13 +125,16 @@ def test_case_01(add_remove_file): # only raw data
|
|||
"label": {"type": "int32"}
|
||||
}
|
||||
|
||||
writer = FileWriter(TEMP_FILE, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
writer.add_schema(schema, "schema")
|
||||
writer.write_raw_data(data)
|
||||
writer.commit()
|
||||
|
||||
d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False)
|
||||
d1.save(AUTO_FILE, FILES_NUM)
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False)
|
||||
d1.save(file_name_auto, FILES_NUM)
|
||||
|
||||
data_value_to_list = []
|
||||
for item in data:
|
||||
|
@ -147,7 +143,7 @@ def test_case_01(add_remove_file): # only raw data
|
|||
new_data['label'] = np.asarray(list([item["label"]]), dtype=np.int32)
|
||||
data_value_to_list.append(new_data)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
assert d2.get_dataset_size() == 6
|
||||
|
@ -163,9 +159,17 @@ def test_case_01(add_remove_file): # only raw data
|
|||
assert item[field] == data_value_to_list[num_iter][field]
|
||||
num_iter += 1
|
||||
assert num_iter == 6
|
||||
remove_file(file_name)
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def test_case_02(add_remove_file): # muti-bytes
|
||||
def test_case_02(): # muti-bytes
|
||||
"""
|
||||
Feature: save op
|
||||
Description: multiple byte fields
|
||||
Expectation: generated mindrecord file
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
data = [{"file_name": "001.jpg", "label": 43,
|
||||
"float32_array": np.array([1.2, 2.78, 3.1234, 4.9871, 5.12341], dtype=np.float32),
|
||||
"float64_array": np.array([48.1234556789, 49.3251241431, 50.13514312414, 51.8971298471,
|
||||
|
@ -258,13 +262,16 @@ def test_case_02(add_remove_file): # muti-bytes
|
|||
"label": {"type": "int32"},
|
||||
"image4": {"type": "bytes"},
|
||||
"image5": {"type": "bytes"}}
|
||||
writer = FileWriter(TEMP_FILE, FILES_NUM)
|
||||
writer = FileWriter(file_name, FILES_NUM)
|
||||
writer.add_schema(schema, "schema")
|
||||
writer.write_raw_data(data)
|
||||
writer.commit()
|
||||
|
||||
d1 = ds.MindDataset(TEMP_FILE, None, num_readers, shuffle=False)
|
||||
d1.save(AUTO_FILE, FILES_NUM)
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
d1 = ds.MindDataset(file_name, None, num_readers, shuffle=False)
|
||||
d1.save(file_name_auto, FILES_NUM)
|
||||
data_value_to_list = []
|
||||
|
||||
for item in data:
|
||||
|
@ -284,7 +291,7 @@ def test_case_02(add_remove_file): # muti-bytes
|
|||
new_data['image5'] = np.asarray(list(item["image5"]), dtype=np.uint8)
|
||||
data_value_to_list.append(new_data)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
assert d2.get_dataset_size() == 6
|
||||
|
@ -303,6 +310,8 @@ def test_case_02(add_remove_file): # muti-bytes
|
|||
assert item[field] == data_value_to_list[num_iter][field]
|
||||
num_iter += 1
|
||||
assert num_iter == 6
|
||||
remove_file(file_name)
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def generator_1d():
|
||||
|
@ -310,14 +319,21 @@ def generator_1d():
|
|||
yield (np.array([i]),)
|
||||
|
||||
|
||||
def test_case_03(add_remove_file):
|
||||
|
||||
def test_case_03():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: 1D numpy array
|
||||
Expectation: generated mindrecord file
|
||||
"""
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
# apply dataset operations
|
||||
d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
|
||||
|
||||
d1.save(AUTO_FILE)
|
||||
d1.save(file_name_auto)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
|
||||
|
@ -327,6 +343,7 @@ def test_case_03(add_remove_file):
|
|||
golden = np.array([i])
|
||||
np.testing.assert_array_equal(item["data"], golden)
|
||||
i = i + 1
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def generator_with_type(t):
|
||||
|
@ -335,6 +352,9 @@ def generator_with_type(t):
|
|||
|
||||
|
||||
def type_tester(t):
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
logger.info("Test with Type {}".format(t.__name__))
|
||||
|
||||
# apply dataset operations
|
||||
|
@ -344,9 +364,9 @@ def type_tester(t):
|
|||
|
||||
data1 = data1.repeat(3)
|
||||
|
||||
data1.save(AUTO_FILE)
|
||||
data1.save(file_name_auto)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
|
||||
|
@ -362,10 +382,7 @@ def type_tester(t):
|
|||
i = 0
|
||||
num_repeat += 1
|
||||
assert num_repeat == 3
|
||||
if os.path.exists("{}".format(AUTO_FILE)):
|
||||
os.remove("{}".format(AUTO_FILE))
|
||||
if os.path.exists("{}.db".format(AUTO_FILE)):
|
||||
os.remove("{}.db".format(AUTO_FILE))
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def test_case_04():
|
||||
|
@ -377,20 +394,31 @@ def test_case_04():
|
|||
type_tester(t)
|
||||
|
||||
|
||||
def test_case_05(add_remove_file):
|
||||
def test_case_05():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: Exception Test
|
||||
Expectation: exception
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
|
||||
|
||||
with pytest.raises(Exception, match="num_files should between 0 and 1000."):
|
||||
d1.save(AUTO_FILE, 0)
|
||||
d1.save(file_name, 0)
|
||||
|
||||
|
||||
def test_case_06(add_remove_file):
|
||||
|
||||
def test_case_06():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: Exception Test
|
||||
Expectation: exception
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
d1 = ds.GeneratorDataset(generator_1d, ["data"], shuffle=False)
|
||||
|
||||
with pytest.raises(Exception, match="tfrecord dataset format is not supported."):
|
||||
d1.save(AUTO_FILE, 1, "tfrecord")
|
||||
d1.save(file_name, 1, "tfrecord")
|
||||
|
||||
|
||||
def cast_name(key):
|
||||
|
@ -405,16 +433,20 @@ def cast_name(key):
|
|||
|
||||
|
||||
def test_case_07():
|
||||
if os.path.exists("{}".format(AUTO_FILE)):
|
||||
os.remove("{}".format(AUTO_FILE))
|
||||
if os.path.exists("{}.db".format(AUTO_FILE)):
|
||||
os.remove("{}.db".format(AUTO_FILE))
|
||||
"""
|
||||
Feature: save op
|
||||
Description: save tfrecord files
|
||||
Expectation: generated mindrecord file
|
||||
"""
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
d1 = ds.TFRecordDataset(TFRECORD_FILES, shuffle=False)
|
||||
tf_data = []
|
||||
for x in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
tf_data.append(x)
|
||||
d1.save(AUTO_FILE, FILES_NUM)
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d1.save(file_name_auto, FILES_NUM)
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
mr_data = []
|
||||
|
@ -429,11 +461,7 @@ def test_case_07():
|
|||
assert v == mr_data[count][cast_name(k)]
|
||||
count += 1
|
||||
assert count == 10
|
||||
|
||||
if os.path.exists("{}".format(AUTO_FILE)):
|
||||
os.remove("{}".format(AUTO_FILE))
|
||||
if os.path.exists("{}.db".format(AUTO_FILE)):
|
||||
os.remove("{}.db".format(AUTO_FILE))
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def generator_dynamic_1d():
|
||||
|
@ -461,14 +489,21 @@ def generator_dynamic_2d_1():
|
|||
yield (np.arange(10).reshape([5, 2]),)
|
||||
|
||||
|
||||
def test_case_08(add_remove_file):
|
||||
|
||||
def test_case_08():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: save dynamic 1D numpy array
|
||||
Expectation: generated mindrecord file
|
||||
"""
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
# apply dataset operations
|
||||
d1 = ds.GeneratorDataset(generator_dynamic_1d, ["data"], shuffle=False)
|
||||
|
||||
d1.save(AUTO_FILE)
|
||||
d1.save(file_name_auto)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
|
||||
|
@ -481,16 +516,23 @@ def test_case_08(add_remove_file):
|
|||
golden = np.array(arr)
|
||||
np.testing.assert_array_equal(item["data"], golden)
|
||||
i = i + 1
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def test_case_09(add_remove_file):
|
||||
|
||||
def test_case_09():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: save dynamic 2D numpy array
|
||||
Expectation: generated mindrecord file
|
||||
"""
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
# apply dataset operations
|
||||
d1 = ds.GeneratorDataset(generator_dynamic_2d_0, ["data"], shuffle=False)
|
||||
|
||||
d1.save(AUTO_FILE)
|
||||
d1.save(file_name_auto)
|
||||
|
||||
d2 = ds.MindDataset(dataset_file=AUTO_FILE,
|
||||
d2 = ds.MindDataset(dataset_file=file_name_auto,
|
||||
num_parallel_workers=num_readers,
|
||||
shuffle=False)
|
||||
|
||||
|
@ -502,13 +544,23 @@ def test_case_09(add_remove_file):
|
|||
golden = np.arange(10).reshape([2, 5])
|
||||
np.testing.assert_array_equal(item["data"], golden)
|
||||
i = i + 1
|
||||
remove_file(file_name_auto)
|
||||
|
||||
|
||||
def test_case_10(add_remove_file):
|
||||
def test_case_10():
|
||||
"""
|
||||
Feature: save op
|
||||
Description: save 2D Tensor of different shape
|
||||
Expectation: Exception
|
||||
"""
|
||||
file_name_auto = './'
|
||||
file_name_auto += os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
file_name_auto += '_auto'
|
||||
|
||||
# apply dataset operations
|
||||
d1 = ds.GeneratorDataset(generator_dynamic_2d_1, ["data"], shuffle=False)
|
||||
|
||||
with pytest.raises(Exception, match=
|
||||
"Error: besides dimension 0, other dimension shape is different from the previous's"):
|
||||
d1.save(AUTO_FILE)
|
||||
d1.save(file_name_auto)
|
||||
remove_file(file_name_auto)
|
||||
|
|
|
@ -22,7 +22,6 @@ import os
|
|||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from test_minddataset_sampler import add_and_remove_cv_file, get_data, CV_DIR_NAME, CV_FILE_NAME
|
||||
from util import config_get_set_num_parallel_workers, config_get_set_seed
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
|
@ -509,38 +508,6 @@ def delete_json_files():
|
|||
except IOError:
|
||||
logger.info("Error while deleting: {}".format(f))
|
||||
|
||||
|
||||
# Test save load minddataset
|
||||
def skip_test_minddataset(add_and_remove_cv_file=True):
|
||||
"""tutorial for cv minderdataset."""
|
||||
columns_list = ["data", "file_name", "label"]
|
||||
num_readers = 4
|
||||
indices = [1, 2, 3, 5, 7]
|
||||
sampler = ds.SubsetRandomSampler(indices)
|
||||
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
|
||||
sampler=sampler)
|
||||
|
||||
# Serializing into python dictionary
|
||||
ds1_dict = ds.serialize(data_set)
|
||||
# Serializing into json object
|
||||
ds1_json = json.dumps(ds1_dict, sort_keys=True)
|
||||
|
||||
# Reconstruct dataset pipeline from its serialized form
|
||||
data_set = ds.deserialize(input_dict=ds1_dict)
|
||||
ds2_dict = ds.serialize(data_set)
|
||||
# Serializing into json object
|
||||
ds2_json = json.dumps(ds2_dict, sort_keys=True)
|
||||
|
||||
assert ds1_json == ds2_json
|
||||
|
||||
_ = get_data(CV_DIR_NAME)
|
||||
assert data_set.get_dataset_size() == 5
|
||||
num_iter = 0
|
||||
for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
num_iter += 1
|
||||
assert num_iter == 5
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_serdes_imagefolder_dataset()
|
||||
test_serdes_mnist_dataset()
|
||||
|
@ -555,4 +522,3 @@ if __name__ == '__main__':
|
|||
test_serdes_uniform_augment()
|
||||
skip_test_serdes_fill()
|
||||
test_serdes_exception()
|
||||
skip_test_minddataset()
|
||||
|
|
|
@ -23,8 +23,6 @@ from mindspore import log as logger
|
|||
from mindspore.mindrecord import FileWriter, FileReader, MindPage, SUCCESS
|
||||
from mindspore.mindrecord import ParamValueError, MRMGetMetaError
|
||||
|
||||
CV_FILE_NAME = "./imagenet.mindrecord"
|
||||
NLP_FILE_NAME = "./aclImdb.mindrecord"
|
||||
FILES_NUM = 4
|
||||
|
||||
def remove_one_file(x):
|
||||
|
@ -42,20 +40,6 @@ def remove_file(file_name):
|
|||
x = file_name + str(i) + ".db"
|
||||
remove_one_file(x)
|
||||
|
||||
@pytest.fixture
|
||||
def fixture_cv_file():
|
||||
"""add/remove file"""
|
||||
remove_file(CV_FILE_NAME)
|
||||
yield "yield_fixture_data"
|
||||
remove_file(CV_FILE_NAME)
|
||||
|
||||
@pytest.fixture
|
||||
def fixture_nlp_file():
|
||||
"""add/remove file"""
|
||||
remove_file(NLP_FILE_NAME)
|
||||
yield "yield_fixture_data"
|
||||
remove_file(NLP_FILE_NAME)
|
||||
|
||||
def test_cv_file_writer_shard_num_none():
|
||||
"""test cv file writer when shard num is None."""
|
||||
with pytest.raises(Exception, match="Shard num is illegal."):
|
||||
|
@ -71,29 +55,29 @@ def test_cv_file_writer_shard_num_str():
|
|||
def test_cv_page_reader_consumer_num_none():
|
||||
"""test cv page reader when consumer number is None."""
|
||||
with pytest.raises(Exception, match="Consumer number is illegal."):
|
||||
MindPage(CV_FILE_NAME + "0", None)
|
||||
MindPage("dummy.mindrecord", None)
|
||||
|
||||
|
||||
def test_cv_page_reader_consumer_num_str():
|
||||
"""test cv page reader when consumer number is string."""
|
||||
with pytest.raises(Exception, match="Consumer number is illegal."):
|
||||
MindPage(CV_FILE_NAME + "0", "2")
|
||||
MindPage("dummy.mindrecord", "2")
|
||||
|
||||
|
||||
def test_nlp_file_reader_consumer_num_none():
|
||||
"""test nlp file reader when consumer number is None."""
|
||||
with pytest.raises(Exception, match="Consumer number is illegal."):
|
||||
FileReader(NLP_FILE_NAME + "0", None)
|
||||
FileReader("dummy.mindrecord", None)
|
||||
|
||||
|
||||
def test_nlp_file_reader_consumer_num_str():
|
||||
"""test nlp file reader when consumer number is string."""
|
||||
with pytest.raises(Exception, match="Consumer number is illegal."):
|
||||
FileReader(NLP_FILE_NAME + "0", "4")
|
||||
FileReader("dummy.mindrecord", "4")
|
||||
|
||||
|
||||
def create_cv_mindrecord(files_num):
|
||||
writer = FileWriter(CV_FILE_NAME, files_num)
|
||||
def create_cv_mindrecord(files_num, file_name):
|
||||
writer = FileWriter(file_name, files_num)
|
||||
data = get_data("../data/mindrecord/testImageNetData/")
|
||||
cv_schema_json = {"file_name": {"type": "string"},
|
||||
"label": {"type": "int64"}, "data": {"type": "bytes"}}
|
||||
|
@ -104,139 +88,218 @@ def create_cv_mindrecord(files_num):
|
|||
|
||||
|
||||
def test_lack_partition_and_db():
|
||||
"""test file reader when mindrecord file does not exist."""
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when mindrecord file does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader('dummy.mindrecord')
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid file, path:' in str(err.value)
|
||||
|
||||
def test_lack_db(fixture_cv_file):
|
||||
"""test file reader when db file does not exist."""
|
||||
create_cv_mindrecord(1)
|
||||
os.remove("{}.db".format(CV_FILE_NAME))
|
||||
def test_lack_db():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when db file does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(1, file_name)
|
||||
os.remove("{}.db".format(file_name))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader(CV_FILE_NAME)
|
||||
reader = FileReader(file_name)
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid database file, path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_lack_some_partition_and_db(fixture_cv_file):
|
||||
"""test file reader when some partition and db do not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
def test_lack_some_partition_and_db():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when some partition and db do not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
os.remove("{}".format(paths[3]))
|
||||
os.remove("{}.db".format(paths[3]))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader(CV_FILE_NAME + "0")
|
||||
reader = FileReader(file_name + "0")
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid file, path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_lack_some_partition_first(fixture_cv_file):
|
||||
"""test file reader when first partition does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
def test_lack_some_partition_first():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when first partition does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
os.remove("{}".format(paths[0]))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader(CV_FILE_NAME + "0")
|
||||
reader = FileReader(file_name + "0")
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid file, path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_lack_some_partition_middle(fixture_cv_file):
|
||||
"""test file reader when some partition does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
def test_lack_some_partition_middle():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when some partition does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
os.remove("{}".format(paths[1]))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader(CV_FILE_NAME + "0")
|
||||
reader = FileReader(file_name + "0")
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid file, path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_lack_some_partition_last(fixture_cv_file):
|
||||
"""test file reader when last partition does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
def test_lack_some_partition_last():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when last partition does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
os.remove("{}".format(paths[3]))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader(CV_FILE_NAME + "0")
|
||||
reader = FileReader(file_name + "0")
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid file, path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_mindpage_lack_some_partition(fixture_cv_file):
|
||||
"""test page reader when some partition does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
def test_mindpage_lack_some_partition():
|
||||
"""
|
||||
Feature: MindPage
|
||||
Description: test page reader when some partition does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
os.remove("{}".format(paths[0]))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
MindPage(CV_FILE_NAME + "0")
|
||||
MindPage(file_name + "0")
|
||||
assert 'Unexpected error. Invalid file, path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_lack_some_db(fixture_cv_file):
|
||||
"""test file reader when some db does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
def test_lack_some_db():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when some db does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
paths = ["{}{}".format(file_name, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
os.remove("{}.db".format(paths[3]))
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
reader = FileReader(CV_FILE_NAME + "0")
|
||||
reader = FileReader(file_name + "0")
|
||||
reader.close()
|
||||
assert 'Unexpected error. Invalid database file, path:' in str(err.value)
|
||||
|
||||
remove_file(file_name)
|
||||
|
||||
def test_invalid_mindrecord():
|
||||
"""test file reader when the content of mindrecord is illegal."""
|
||||
with open(CV_FILE_NAME, 'w') as f:
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when the content of mindrecord is illegal
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
with open(file_name, 'w') as f:
|
||||
dummy = 's' * 100
|
||||
f.write(dummy)
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
FileReader(CV_FILE_NAME)
|
||||
FileReader(file_name)
|
||||
assert "Unexpected error. Invalid file content, incorrect file or file header" in str(err.value)
|
||||
os.remove(CV_FILE_NAME)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_invalid_db(fixture_cv_file):
|
||||
"""test file reader when the content of db is illegal."""
|
||||
create_cv_mindrecord(1)
|
||||
os.remove("imagenet.mindrecord.db")
|
||||
with open('imagenet.mindrecord.db', 'w') as f:
|
||||
def test_invalid_db():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when the content of db is illegal
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(1, file_name)
|
||||
os.remove(file_name + ".db")
|
||||
with open(file_name + ".db", 'w') as f:
|
||||
f.write('just for test')
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
FileReader('imagenet.mindrecord')
|
||||
FileReader(file_name)
|
||||
assert "Unexpected error. Failed to execute sql [ SELECT NAME from SHARD_NAME; ], " in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_overwrite_invalid_mindrecord(fixture_cv_file):
|
||||
"""test file writer when overwrite invalid mindreocrd file."""
|
||||
with open(CV_FILE_NAME, 'w') as f:
|
||||
def test_overwrite_invalid_mindrecord():
|
||||
"""
|
||||
Feature: FileWriter
|
||||
Description: test file writer when overwrite invalid mindreocrd file
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
with open(file_name, 'w') as f:
|
||||
f.write('just for test')
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
create_cv_mindrecord(1)
|
||||
create_cv_mindrecord(1, file_name)
|
||||
assert 'Unexpected error. Invalid file, Mindrecord files already existed in path:' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_overwrite_invalid_db(fixture_cv_file):
|
||||
"""test file writer when overwrite invalid db file."""
|
||||
with open('imagenet.mindrecord.db', 'w') as f:
|
||||
def test_overwrite_invalid_db():
|
||||
"""
|
||||
Feature: FileWriter
|
||||
Description: test file writer when overwrite invalid db file
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
with open(file_name + '.db', 'w') as f:
|
||||
f.write('just for test')
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
create_cv_mindrecord(1)
|
||||
create_cv_mindrecord(1, file_name)
|
||||
assert 'Unexpected error. Failed to write data to db.' in str(err.value)
|
||||
remove_file(file_name)
|
||||
|
||||
def test_read_after_close(fixture_cv_file):
|
||||
"""test file reader when close read."""
|
||||
create_cv_mindrecord(1)
|
||||
reader = FileReader(CV_FILE_NAME)
|
||||
def test_read_after_close():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when close read
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(1, file_name)
|
||||
reader = FileReader(file_name)
|
||||
reader.close()
|
||||
count = 0
|
||||
for index, x in enumerate(reader.get_next()):
|
||||
count = count + 1
|
||||
logger.info("#item{}: {}".format(index, x))
|
||||
assert count == 0
|
||||
remove_file(file_name)
|
||||
|
||||
def test_file_read_after_read(fixture_cv_file):
|
||||
"""test file reader when finish read."""
|
||||
create_cv_mindrecord(1)
|
||||
reader = FileReader(CV_FILE_NAME)
|
||||
def test_file_read_after_read():
|
||||
"""
|
||||
Feature: FileReader
|
||||
Description: test file reader when finish read
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(1, file_name)
|
||||
reader = FileReader(file_name)
|
||||
count = 0
|
||||
for index, x in enumerate(reader.get_next()):
|
||||
assert len(x) == 3
|
||||
|
@ -249,25 +312,40 @@ def test_file_read_after_read(fixture_cv_file):
|
|||
cnt = cnt + 1
|
||||
logger.info("#item{}: {}".format(index, x))
|
||||
assert cnt == 0
|
||||
remove_file(file_name)
|
||||
|
||||
|
||||
def test_cv_file_writer_shard_num_greater_than_1000():
|
||||
"""test cv file writer shard number greater than 1000."""
|
||||
"""
|
||||
Feature: FileWriter
|
||||
Description: test cv file writer shard number greater than 1000
|
||||
Expectation: exception occur
|
||||
"""
|
||||
with pytest.raises(ParamValueError) as err:
|
||||
FileWriter(CV_FILE_NAME, 1001)
|
||||
FileWriter('dummy.mindrecord', 1001)
|
||||
assert 'Shard number should between' in str(err.value)
|
||||
|
||||
|
||||
def test_add_index_without_add_schema():
|
||||
"""
|
||||
Feature: FileWriter
|
||||
Description: test add index without adding schema
|
||||
Expectation: exception occur
|
||||
"""
|
||||
with pytest.raises(MRMGetMetaError) as err:
|
||||
fw = FileWriter(CV_FILE_NAME)
|
||||
fw = FileWriter('dummy.mindrecord')
|
||||
fw.add_index(["label"])
|
||||
assert 'Failed to get meta info' in str(err.value)
|
||||
|
||||
def test_mindpage_pageno_pagesize_not_int(fixture_cv_file):
|
||||
"""test page reader when some partition does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
reader = MindPage(CV_FILE_NAME + "0")
|
||||
def test_mindpage_pageno_pagesize_not_int():
|
||||
"""
|
||||
Feature: MindPage
|
||||
Description: test page reader when some partition does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
reader = MindPage(file_name + "0")
|
||||
fields = reader.get_category_fields()
|
||||
assert fields == ['file_name', 'label'], \
|
||||
'failed on getting candidate category fields.'
|
||||
|
@ -293,12 +371,18 @@ def test_mindpage_pageno_pagesize_not_int(fixture_cv_file):
|
|||
with pytest.raises(RuntimeError, match=r"Unexpected error. Invalid data, "
|
||||
r"category_id: 99999 must be in the range \[0, 10\]."):
|
||||
reader.read_at_page_by_id(99999, 0, 1)
|
||||
remove_file(file_name)
|
||||
|
||||
|
||||
def test_mindpage_filename_not_exist(fixture_cv_file):
|
||||
"""test page reader when some partition does not exist."""
|
||||
create_cv_mindrecord(4)
|
||||
reader = MindPage(CV_FILE_NAME + "0")
|
||||
def test_mindpage_filename_not_exist():
|
||||
"""
|
||||
Feature: FileWrite
|
||||
Description: test page reader when some partition does not exist
|
||||
Expectation: exception occur
|
||||
"""
|
||||
file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
create_cv_mindrecord(4, file_name)
|
||||
reader = MindPage(file_name + "0")
|
||||
fields = reader.get_category_fields()
|
||||
assert fields == ['file_name', 'label'], \
|
||||
'failed on getting candidate category fields.'
|
||||
|
@ -319,11 +403,15 @@ def test_mindpage_filename_not_exist(fixture_cv_file):
|
|||
with pytest.raises(ParamValueError):
|
||||
reader.read_at_page_by_name(1, 0, 1)
|
||||
|
||||
_ = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
|
||||
for x in range(FILES_NUM)]
|
||||
remove_file(file_name)
|
||||
|
||||
def test_invalid_schema():
|
||||
mindrecord_file_name = "test.mindrecord"
|
||||
"""
|
||||
Feature: FileWrite
|
||||
Description: test invalid schema
|
||||
Expectation: exception occur
|
||||
"""
|
||||
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
writer = FileWriter(mindrecord_file_name)
|
||||
|
||||
# string => str
|
||||
|
@ -462,7 +550,7 @@ def test_invalid_schema():
|
|||
os.remove("{}.db".format(mindrecord_file_name))
|
||||
|
||||
def test_write_with_invalid_data():
|
||||
mindrecord_file_name = "test.mindrecord"
|
||||
mindrecord_file_name = os.environ.get('PYTEST_CURRENT_TEST').split(':')[-1].split(' ')[0]
|
||||
|
||||
# field: file_name => filename
|
||||
with pytest.raises(RuntimeError, match="Unexpected error. Invalid data, schema count should be positive."):
|
||||
|
|
Loading…
Reference in New Issue