forked from mindspore-Ecosystem/mindspore
102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
# Copyright 2019 Huawei Technologies Co., Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
import pytest
|
|
import mindspore.dataset as ds
|
|
from mindspore import log as logger
|
|
|
|
DATA_DIR = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
|
|
"../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
|
|
"../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
|
|
"../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
|
|
|
|
SCHEMA_DIR = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
|
|
|
|
DISTRIBUTION_ALL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionAll.json"
|
|
DISTRIBUTION_UNIQUE_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionUnique.json"
|
|
DISTRIBUTION_RANDOM_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionRandom.json"
|
|
DISTRIBUTION_EQUAL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionEqualRows.json"
|
|
|
|
|
|
def test_tf_file_normal():
|
|
# apply dataset operations
|
|
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
|
|
data1 = data1.repeat(1)
|
|
num_iter = 0
|
|
for item in data1.create_dict_iterator(): # each data is a dictionary
|
|
num_iter += 1
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter))
|
|
assert num_iter == 12
|
|
|
|
|
|
def test_tf_file_distribution_all():
|
|
# apply dataset operations
|
|
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_ALL_DIR)
|
|
data1 = data1.repeat(2)
|
|
num_iter = 0
|
|
for item in data1.create_dict_iterator(): # each data is a dictionary
|
|
num_iter += 1
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter))
|
|
assert num_iter == 24
|
|
|
|
|
|
def test_tf_file_distribution_unique():
|
|
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_UNIQUE_DIR)
|
|
data1 = data1.repeat(1)
|
|
num_iter = 0
|
|
for item in data1.create_dict_iterator(): # each data is a dictionary
|
|
num_iter += 1
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter))
|
|
assert num_iter == 4
|
|
|
|
|
|
def test_tf_file_distribution_random():
|
|
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_RANDOM_DIR)
|
|
data1 = data1.repeat(1)
|
|
num_iter = 0
|
|
for item in data1.create_dict_iterator(): # each data is a dictionary
|
|
num_iter += 1
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter))
|
|
assert num_iter == 4
|
|
|
|
|
|
def test_tf_file_distribution_equal_rows():
|
|
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_EQUAL_DIR)
|
|
data1 = data1.repeat(2)
|
|
num_iter = 0
|
|
for item in data1.create_dict_iterator(): # each data is a dictionary
|
|
num_iter += 1
|
|
|
|
assert num_iter == 4
|
|
|
|
|
|
if __name__ == '__main__':
|
|
logger.info('=======test normal=======')
|
|
test_tf_file_normal()
|
|
|
|
logger.info('=======test all=======')
|
|
test_tf_file_distribution_all()
|
|
|
|
logger.info('=======test unique=======')
|
|
test_tf_file_distribution_unique()
|
|
|
|
logger.info('=======test random=======')
|
|
test_tf_file_distribution_random()
|
|
logger.info('=======test equal rows=======')
|
|
test_tf_file_distribution_equal_rows()
|