mindspore/tests/ut/python/dataset/test_datasets_imagenet_dist...

102 lines
3.6 KiB
Python

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import pytest
import mindspore.dataset as ds
from mindspore import log as logger
DATA_DIR = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
"../data/dataset/test_tf_file_3_images2/train-0000-of-0002.data",
"../data/dataset/test_tf_file_3_images2/train-0000-of-0003.data",
"../data/dataset/test_tf_file_3_images2/train-0000-of-0004.data"]
SCHEMA_DIR = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
DISTRIBUTION_ALL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionAll.json"
DISTRIBUTION_UNIQUE_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionUnique.json"
DISTRIBUTION_RANDOM_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionRandom.json"
DISTRIBUTION_EQUAL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionEqualRows.json"
def test_tf_file_normal():
# apply dataset operations
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
data1 = data1.repeat(1)
num_iter = 0
for item in data1.create_dict_iterator(): # each data is a dictionary
num_iter += 1
logger.info("Number of data in data1: {}".format(num_iter))
assert num_iter == 12
def test_tf_file_distribution_all():
# apply dataset operations
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_ALL_DIR)
data1 = data1.repeat(2)
num_iter = 0
for item in data1.create_dict_iterator(): # each data is a dictionary
num_iter += 1
logger.info("Number of data in data1: {}".format(num_iter))
assert num_iter == 24
def test_tf_file_distribution_unique():
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_UNIQUE_DIR)
data1 = data1.repeat(1)
num_iter = 0
for item in data1.create_dict_iterator(): # each data is a dictionary
num_iter += 1
logger.info("Number of data in data1: {}".format(num_iter))
assert num_iter == 4
def test_tf_file_distribution_random():
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_RANDOM_DIR)
data1 = data1.repeat(1)
num_iter = 0
for item in data1.create_dict_iterator(): # each data is a dictionary
num_iter += 1
logger.info("Number of data in data1: {}".format(num_iter))
assert num_iter == 4
def test_tf_file_distribution_equal_rows():
data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_EQUAL_DIR)
data1 = data1.repeat(2)
num_iter = 0
for item in data1.create_dict_iterator(): # each data is a dictionary
num_iter += 1
assert num_iter == 4
if __name__ == '__main__':
logger.info('=======test normal=======')
test_tf_file_normal()
logger.info('=======test all=======')
test_tf_file_distribution_all()
logger.info('=======test unique=======')
test_tf_file_distribution_unique()
logger.info('=======test random=======')
test_tf_file_distribution_random()
logger.info('=======test equal rows=======')
test_tf_file_distribution_equal_rows()