mindspore/tests/ut/python/dataset/test_repeat.py

343 lines
10 KiB
Python

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Test Repeat Op
"""
import numpy as np
import pytest
import mindspore.dataset as ds
import mindspore.dataset.vision as vision
from mindspore import log as logger
from util import save_and_check_dict
DATA_DIR_TF = ["../data/dataset/testTFTestAllTypes/test.data"]
SCHEMA_DIR_TF = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
SCHEMA_DIR_TF2 = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
GENERATE_GOLDEN = False
def test_tf_repeat_01():
"""
Test a simple repeat operation.
"""
logger.info("Test Simple Repeat")
# define parameters
repeat_count = 2
# apply dataset operations
data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False)
data1 = data1.repeat(repeat_count)
filename = "repeat_result.npz"
save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
def test_tf_repeat_02():
"""
Test Infinite Repeat.
"""
logger.info("Test Infinite Repeat")
# define parameters
repeat_count = -1
# apply dataset operations
data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, shuffle=False)
data1 = data1.repeat(repeat_count)
itr = 0
for _ in data1:
itr = itr + 1
if itr == 100:
break
assert itr == 100
def test_tf_repeat_03():
"""
Test Repeat then Batch.
"""
logger.info("Test Repeat then Batch")
data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False)
batch_size = 32
resize_height, resize_width = 32, 32
decode_op = vision.Decode()
resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.repeat(22)
data1 = data1.batch(batch_size, drop_remainder=True)
num_iter = 0
for _ in data1.create_dict_iterator(num_epochs=1):
num_iter += 1
logger.info("Number of tf data in data1: {}".format(num_iter))
assert num_iter == 2
def test_tf_repeat_04():
"""
Test a simple repeat operation with column list.
"""
logger.info("Test Simple Repeat Column List")
# define parameters
repeat_count = 2
columns_list = ["col_sint64", "col_sint32"]
# apply dataset operations
data1 = ds.TFRecordDataset(DATA_DIR_TF, SCHEMA_DIR_TF, columns_list=columns_list, shuffle=False)
data1 = data1.repeat(repeat_count)
filename = "repeat_list_result.npz"
save_and_check_dict(data1, filename, generate_golden=GENERATE_GOLDEN)
def generator():
for i in range(3):
(yield np.array([i]),)
def test_nested_repeat1():
logger.info("test_nested_repeat1")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(2)
data = data.repeat(3)
for i, d in enumerate(data.create_tuple_iterator(num_epochs=1, output_numpy=True)):
assert i % 3 == d[0][0]
assert sum([1 for _ in data]) == 2 * 3 * 3
def test_nested_repeat2():
logger.info("test_nested_repeat2")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(1)
data = data.repeat(1)
for i, d in enumerate(data.create_tuple_iterator(num_epochs=1, output_numpy=True)):
assert i % 3 == d[0][0]
assert sum([1 for _ in data]) == 3
def test_nested_repeat3():
logger.info("test_nested_repeat3")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(1)
data = data.repeat(2)
for i, d in enumerate(data.create_tuple_iterator(num_epochs=1, output_numpy=True)):
assert i % 3 == d[0][0]
assert sum([1 for _ in data]) == 2 * 3
def test_nested_repeat4():
logger.info("test_nested_repeat4")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(2)
data = data.repeat(1)
for i, d in enumerate(data.create_tuple_iterator(num_epochs=1, output_numpy=True)):
assert i % 3 == d[0][0]
assert sum([1 for _ in data]) == 2 * 3
def test_nested_repeat5():
logger.info("test_nested_repeat5")
data = ds.GeneratorDataset(generator, ["data"])
data = data.batch(3)
data = data.repeat(2)
data = data.repeat(3)
for _, d in enumerate(data):
np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1], [2]]))
assert sum([1 for _ in data]) == 6
def test_nested_repeat6():
logger.info("test_nested_repeat6")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(2)
data = data.batch(3)
data = data.repeat(3)
for _, d in enumerate(data):
np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1], [2]]))
assert sum([1 for _ in data]) == 6
def test_nested_repeat7():
logger.info("test_nested_repeat7")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(2)
data = data.repeat(3)
data = data.batch(3)
for _, d in enumerate(data):
np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1], [2]]))
assert sum([1 for _ in data]) == 6
def test_nested_repeat8():
logger.info("test_nested_repeat8")
data = ds.GeneratorDataset(generator, ["data"])
data = data.batch(2, drop_remainder=False)
data = data.repeat(2)
data = data.repeat(3)
for i, d in enumerate(data):
if i % 2 == 0:
np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1]]))
else:
np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[2]]))
assert sum([1 for _ in data]) == 6 * 2
def test_nested_repeat9():
logger.info("test_nested_repeat9")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat()
data = data.repeat(3)
for i, d in enumerate(data):
assert i % 3 == d[0].asnumpy()[0]
if i == 10:
break
def test_nested_repeat10():
logger.info("test_nested_repeat10")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(3)
data = data.repeat()
for i, d in enumerate(data):
assert i % 3 == d[0].asnumpy()[0]
if i == 10:
break
def test_nested_repeat11():
logger.info("test_nested_repeat11")
data = ds.GeneratorDataset(generator, ["data"])
data = data.repeat(2)
data = data.repeat(3)
data = data.repeat(4)
data = data.repeat(5)
for i, d in enumerate(data):
assert i % 3 == d[0].asnumpy()[0]
assert sum([1 for _ in data]) == 2 * 3 * 4 * 5 * 3
def test_repeat_count1():
data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False)
data1_size = data1.get_dataset_size()
logger.info("dataset size is {}".format(data1_size))
batch_size = 2
repeat_count = 4
resize_height, resize_width = 32, 32
decode_op = vision.Decode()
resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.repeat(repeat_count)
data1 = data1.batch(batch_size, drop_remainder=False)
dataset_size = data1.get_dataset_size()
logger.info("dataset repeat then batch's size is {}".format(dataset_size))
num1_iter = 0
for _ in data1.create_dict_iterator(num_epochs=1):
num1_iter += 1
assert data1_size == 3
assert dataset_size == num1_iter == 6
def test_repeat_count2():
data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False)
data1_size = data1.get_dataset_size()
logger.info("dataset size is {}".format(data1_size))
batch_size = 2
repeat_count = 4
resize_height, resize_width = 32, 32
decode_op = vision.Decode()
resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR)
data1 = data1.map(operations=decode_op, input_columns=["image"])
data1 = data1.map(operations=resize_op, input_columns=["image"])
data1 = data1.batch(batch_size, drop_remainder=False)
data1 = data1.repeat(repeat_count)
dataset_size = data1.get_dataset_size()
logger.info("dataset batch then repeat's size is {}".format(dataset_size))
num1_iter = 0
for _ in data1.create_dict_iterator(num_epochs=1):
num1_iter += 1
assert data1_size == 3
assert dataset_size == num1_iter == 8
def test_repeat_count0():
"""
Test Repeat with invalid count 0.
"""
logger.info("Test Repeat with invalid count 0")
with pytest.raises(ValueError) as info:
data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False)
data1.repeat(0)
assert "count" in str(info.value)
def test_repeat_countneg2():
"""
Test Repeat with invalid count -2.
"""
logger.info("Test Repeat with invalid count -2")
with pytest.raises(ValueError) as info:
data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False)
data1.repeat(-2)
assert "count" in str(info.value)
if __name__ == "__main__":
test_tf_repeat_01()
test_tf_repeat_02()
test_tf_repeat_03()
test_tf_repeat_04()
test_nested_repeat1()
test_nested_repeat2()
test_nested_repeat3()
test_nested_repeat4()
test_nested_repeat5()
test_nested_repeat6()
test_nested_repeat7()
test_nested_repeat8()
test_nested_repeat9()
test_nested_repeat10()
test_nested_repeat11()
test_repeat_count1()
test_repeat_count2()
test_repeat_count0()
test_repeat_countneg2()