[MD] Debug Mode - Mappable Op Row Content Order Fix and UTs

This commit is contained in:
Cathy Wong 2023-01-27 21:12:18 -05:00
parent fdb3e202a2
commit 3c1b2676e3
17 changed files with 439 additions and 38 deletions

View File

@ -183,15 +183,16 @@ Status MappableLeafOp::GetNextRowPullMode(TensorRow *const row) {
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
sample_ids_ = sample_row[0];
MS_LOG(DEBUG) << "Set sample_ids_=" << (*sample_ids_);
}
if (curr_row_ + 1 > sample_ids_->Size()) {
*row = TensorRow(TensorRow::kFlagEOE);
RETURN_IF_NOT_OK(ResetAndUpdateRepeat());
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
return Status::OK();
}
int64_t key;
RETURN_IF_NOT_OK(sample_ids_->GetItemAt(&key, {curr_row_}));
MS_LOG(DEBUG) << "Got key=" << key << " with curr_row_=" << curr_row_;
RETURN_IF_NOT_OK(LoadTensorRowPullMode(key, row));
curr_row_++;
return Status::OK();
@ -200,6 +201,12 @@ Status MappableLeafOp::GetNextRowPullMode(TensorRow *const row) {
Status MappableLeafOp::ResetAndUpdateRepeat() {
if (!IsLastIteration()) {
RETURN_IF_NOT_OK(Reset());
TensorRow sample_row;
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
// Get sample_ids
sample_ids_ = sample_row[0];
MS_LOG(DEBUG) << "Set sample_ids_=" << (*sample_ids_);
UpdateRepeatAndEpochCounter();
} else {
eof_handled_ = true;

View File

@ -48,7 +48,7 @@ TEST_F(MindDataTestPipeline, TestCelebADataset) {
ASSERT_OK(iter->GetNextRow(&row));
// Check if CelebA() read correct images/attr
std::string expect_file[] = {"1.JPEG", "2.jpg"};
std::string expect_file[] = {"1_apple.JPEG", "2_banana.jpg"};
std::vector<std::vector<uint32_t>> expect_attr_vector = {
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},

View File

Before

Width:  |  Height:  |  Size: 155 KiB

After

Width:  |  Height:  |  Size: 155 KiB

View File

Before

Width:  |  Height:  |  Size: 169 KiB

After

Width:  |  Height:  |  Size: 169 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 451 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 457 KiB

View File

@ -1,6 +1,6 @@
4
5_o_Clock_Shadow Arched_Eyebrows Attractive Bags_Under_Eyes Bald Bangs Big_Lips Big_Nose Black_Hair Blond_Hair Blurry Brown_Hair Bushy_Eyebrows Chubby Double_Chin Eyeglasses Goatee Gray_Hair Heavy_Makeup High_Cheekbones Male Mouth_Slightly_Open Mustache Narrow_Eyes No_Beard Oval_Face Pale_Skin Pointy_Nose Receding_Hairline Rosy_Cheeks Sideburns Smiling Straight_Hair Wavy_Hair Wearing_Earrings Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young
1.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
2.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
2.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
1.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
1_apple.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
2_banana.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
3_lemon.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
4_avacado.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1

View File

@ -1,4 +1,4 @@
1.JPEG 0
2.jpeg 1
2.jpeg 2
2.jpeg 0
1_apple.JPEG 0
2_banana.jpg 1
3_lemon.jpg 2
4_avacado.JPEG 0

View File

@ -0,0 +1,16 @@
{
"datasetType": "IMAGENET",
"numRows": 7,
"columns": {
"image": {
"type": "uint8",
"rank": 1,
"t_impl": "cvmat"
},
"label" : {
"type": "uint32",
"rank": 0,
"t_impl" : "flex"
}
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

View File

@ -15,19 +15,74 @@
"""
Test multiple epoch scenarios in debug mode
"""
import math
import numpy as np
import pytest
import mindspore.dataset as ds
from mindspore import log as logger
from util import config_get_set_seed
from util import config_get_set_seed, visualize_list, config_get_set_num_parallel_workers
pytestmark = pytest.mark.forked
@pytest.mark.parametrize("my_debug_mode", (False, True))
def test_pipeline_debug_mode_multi_epoch_celaba(my_debug_mode, plot=False):
"""
Feature: Pipeline debug mode.
Description: Test multiple epoch scenario using CelebADataset.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_epoch_celaba")
# Set configuration
original_seed = config_get_set_seed(99)
original_num_workers = config_get_set_num_parallel_workers(1)
if my_debug_mode:
debug_mode_original = ds.config.get_debug_mode()
ds.config.set_debug_mode(True)
# testCelebAData has 4 samples
num_samples = 4
data1 = ds.CelebADataset("../data/dataset/testCelebAData/", decode=True)
# Confirm dataset size
assert data1.get_dataset_size() == num_samples
num_epoch = 2
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
image_list = []
for _ in range(num_epoch):
row_count = 0
for row_item in iter1:
# Note: Each row in this CelebADataset pipeline has columns "image" and "attr"
assert len(row_item) == 2
assert row_item["image"].shape == (2268, 4032, 3)
image = row_item["image"]
image_list.append(image)
row_count += 1
assert row_count == num_samples
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == num_samples * num_epoch
if plot:
visualize_list(image_list)
# Restore configuration
ds.config.set_seed(original_seed)
ds.config.set_num_parallel_workers(original_num_workers)
if my_debug_mode:
ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode", (False, True))
def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
"""
Feature: Pipeline debug mode.
Description: Test creating tuple iterator in CelebA dataset with take op and multi epochs.
Description: Test multiple epoch scenario using CelebADataset with take op.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_epoch_celaba_take")
@ -45,14 +100,15 @@ def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
data1 = data1.take(num_take)
num_epoch = 2
# Use create_tuple_iterator
iter1 = data1.create_tuple_iterator(num_epochs=num_epoch)
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
# in this example, each row has columns "image" and "label"
for row_item in iter1:
# Note: Each row in this CelebADataset pipeline has columns "image" and "attr"
assert len(row_item) == 2
assert row_item["image"].shape == (2268, 4032, 3)
row_count += 1
assert row_count == num_take
epoch_count += 1
@ -100,7 +156,9 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_take(my_debug_mode):
sample_count = 0
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
for row_item in iter1:
image = row_item["image"]
assert image.shape == (32, 32, 3)
row_count += 1
assert row_count == num_take
epoch_count += 1
@ -130,29 +188,47 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(my_debug_mode):
ds.config.set_debug_mode(True)
data_dir_10 = "../data/dataset/testCifar10Data"
num_samples = 100
num_samples = 40
num_repeat = 2
batch_size = 32
batch_size = 16
data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
# Add repeat then batch
data1 = data1.repeat(num_repeat)
data1 = data1.batch(batch_size, True)
num_epoch = 5
# Use create_tuple_iterator
iter1 = data1.create_tuple_iterator(num_epochs=num_epoch)
num_epoch = 2
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
label_list = []
label_golden = [[0, 7, 8, 4, 9, 1, 9, 8, 6, 2, 7, 0, 2, 1, 7, 0],
[1, 4, 4, 7, 9, 4, 7, 8, 3, 4, 3, 9, 4, 7, 3, 9],
[0, 0, 5, 6, 6, 6, 5, 4, 7, 6, 9, 0, 3, 4, 3, 7],
[2, 4, 0, 3, 1, 7, 7, 9, 9, 8, 7, 0, 6, 3, 7, 9],
[0, 8, 7, 8, 1, 6, 4, 2, 6, 4, 9, 5, 3, 2, 5, 9],
[2, 5, 3, 6, 2, 0, 7, 8, 3, 9, 6, 2, 4, 1, 5, 4],
[5, 0, 9, 8, 6, 4, 9, 2, 0, 0, 0, 2, 7, 2, 6, 4],
[6, 2, 7, 7, 2, 6, 9, 2, 8, 5, 7, 6, 6, 4, 6, 2],
[0, 7, 2, 9, 8, 6, 7, 6, 0, 3, 2, 2, 6, 8, 8, 2],
[4, 6, 6, 9, 8, 4, 6, 1, 0, 5, 5, 9, 2, 0, 8, 7]]
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (batch_size, 32, 32, 3)
label_list.append(label)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == int(num_samples * num_repeat / batch_size)
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
np.testing.assert_array_equal(label_list, np.array(label_golden))
# Restore configuration
ds.config.set_seed(original_seed)
@ -190,8 +266,14 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(my_debug_mode):
sample_count = 0
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (batch_size, 32, 32, 3)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == int(num_samples * num_repeat / batch_size)
epoch_count += 1
sample_count += row_count
@ -242,8 +324,14 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip(my_debug_mode):
sample_count = 0
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (batch_size, 32, 32, 3)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == int(num_samples * num_repeat / batch_size)
epoch_count += 1
sample_count += row_count
@ -274,7 +362,7 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
data_dir_10 = "../data/dataset/testCifar10Data"
num_samples = 20
batch_size = 10
num_repeat = 5
num_repeat = 3
data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
@ -292,15 +380,36 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
iter1 = data3.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
label_list = []
label_golden = [[5, 1, 3, 6, 2, 7, 5, 2, 1, 9],
[3, 0, 9, 1, 1, 2, 5, 5, 6, 3],
[5, 5, 8, 0, 8, 5, 4, 7, 2, 2],
[2, 2, 4, 8, 1, 1, 3, 0, 5, 8],
[1, 1, 0, 5, 5, 5, 8, 4, 4, 1],
[8, 2, 9, 0, 8, 1, 6, 0, 1, 8],
[7, 0, 6, 1, 6, 2, 7, 4, 2, 3],
[9, 8, 0, 2, 7, 4, 1, 9, 8, 3],
[7, 0, 2, 6, 2, 0, 2, 0, 7, 0],
[4, 7, 7, 7, 6, 5, 3, 4, 5, 9],
[1, 9, 7, 5, 7, 7, 2, 2, 9, 2],
[8, 8, 5, 1, 4, 0, 5, 5, 6, 6]]
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (batch_size, 32, 32, 3)
label_list.append(label)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == int(num_samples * num_repeat / batch_size)
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
np.testing.assert_array_equal(label_list, np.array(label_golden))
# Restore configuration
ds.config.set_seed(original_seed)
@ -308,53 +417,322 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_drop_remainder, my_num_samples",
[(False, False, 40), (True, False, 40), (True, True, 43)])
def test_pipeline_debug_mode_multi_epoch_imagefolder_batch(my_debug_mode, my_drop_remainder, my_num_samples):
@pytest.mark.parametrize("my_debug_mode", (False, True))
def test_pipeline_debug_mode_multi_epoch_imagefolder(my_debug_mode, plot=False):
"""
Feature: Pipeline debug mode.
Description: Test multiple epoch scenario using ImageFolderDataset with batch op.
Description: Test multiple epoch scenario using ImageFolderDataset. Plot support provided.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_batch")
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder")
# Set configuration
original_seed = config_get_set_seed(799)
original_seed = config_get_set_seed(899)
original_num_workers = config_get_set_num_parallel_workers(1)
if my_debug_mode:
debug_mode_original = ds.config.get_debug_mode()
ds.config.set_debug_mode(True)
# Note: testImageNetData4 has 7 samples in total
num_samples = 7
# Use all 7 samples from the dataset
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
decode=True)
# Confirm dataset size
assert data1.get_dataset_size() == num_samples
num_epoch = 4
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
image_list = []
label_list = []
label_golden = [4, 3, 5, 0, 1, 2, 6] + [3, 2, 5, 1, 0, 6, 4] + [6, 0, 1, 2, 5, 4, 3] + [3, 4, 5, 1, 0, 6, 2]
for _ in range(num_epoch):
row_count = 0
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (384, 682, 3)
image_list.append(image)
label_list.append(label)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == num_samples
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == num_samples * num_epoch
assert label_list == label_golden
if plot:
visualize_list(image_list)
# Restore configuration
ds.config.set_seed(original_seed)
ds.config.set_num_parallel_workers(original_num_workers)
if my_debug_mode:
ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_shuffle", [(False, True), (True, None), (True, True), (True, False)])
def test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle(my_debug_mode, my_shuffle, plot=False):
"""
Feature: Pipeline debug mode.
Description: Test multiple epoch scenario using ImageFolderDataset with shuffle parameter. Plot support provided.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle")
# Set configuration
original_seed = config_get_set_seed(899)
original_num_workers = config_get_set_num_parallel_workers(1)
if my_debug_mode:
debug_mode_original = ds.config.get_debug_mode()
ds.config.set_debug_mode(True)
num_samples = 5
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
shuffle=my_shuffle,
num_samples=num_samples,
decode=True)
num_epoch = 2
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
image_list = []
label_list = []
if my_shuffle is False:
# Sequential order is used
label_golden = list(range(0, 5)) * num_epoch
else:
# Random order is used, according to the seed value
label_golden = [2, 1, 3, 0, 4] + [3, 6, 2, 0, 3]
for _ in range(num_epoch):
row_count = 0
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (384, 682, 3)
image_list.append(image)
label_list.append(label)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == num_samples
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == num_samples * num_epoch
assert label_list == label_golden
if plot:
visualize_list(image_list)
# Restore configuration
ds.config.set_seed(original_seed)
ds.config.set_num_parallel_workers(original_num_workers)
if my_debug_mode:
ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode", (False, True))
def test_pipeline_debug_mode_multi_epoch_imagefolder_repeat(my_debug_mode, plot=False):
"""
Feature: Pipeline debug mode.
Description: Test multiple epoch scenario using ImageFolderDataset with repeat op. Plot support provided.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_repeat")
# Set configuration
original_seed = config_get_set_seed(899)
original_num_workers = config_get_set_num_parallel_workers(1)
if my_debug_mode:
debug_mode_original = ds.config.get_debug_mode()
ds.config.set_debug_mode(True)
num_samples = 5
num_repeat = 3
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
shuffle=True,
num_samples=num_samples,
decode=True)
data1 = data1.repeat(num_repeat)
num_epoch = 2
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
image_list = []
label_list = []
# Random order is used, according to the seed value
label_golden = [2, 1, 3, 0, 4] + [3, 6, 2, 0, 3] + \
[5, 4, 0, 1, 0] + [0, 0, 1, 3, 5] + \
[4, 5, 5, 2, 0] + [1, 2, 4, 4, 5]
for _ in range(num_epoch):
row_count = 0
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (384, 682, 3)
image_list.append(image)
label_list.append(label)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == num_samples * num_repeat
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == num_samples * num_repeat * num_epoch
assert label_list == label_golden
if plot:
visualize_list(image_list)
# Restore configuration
ds.config.set_seed(original_seed)
ds.config.set_num_parallel_workers(original_num_workers)
if my_debug_mode:
ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_drop, my_num_samples",
[(False, False, 6), (True, False, 6), (True, True, 7)])
def test_pipeline_debug_mode_multi_ep_im_batch_no_remainder(my_debug_mode, my_drop, my_num_samples, plot=False):
"""
Feature: Pipeline debug mode.
Description: Test multiple epoch scenario using ImageFolderDataset with batch op and no remainder.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_ep_im_batch_no_remainder")
# Set configuration
original_seed = config_get_set_seed(899)
original_num_workers = config_get_set_num_parallel_workers(1)
if my_debug_mode:
debug_mode_original = ds.config.get_debug_mode()
ds.config.set_debug_mode(True)
num_samples = my_num_samples
batch_size = 5
batch_size = 2
data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", num_samples=num_samples)
data1 = data1.batch(batch_size, drop_remainder=my_drop_remainder)
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
num_samples=num_samples,
decode=True)
data1 = data1.batch(batch_size, drop_remainder=my_drop)
num_epoch = 3
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
image_list = []
label_list = []
label_golden = [[2, 1], [3, 0], [4, 6]] + [[3, 6], [2, 0], [3, 5]] + [[5, 4], [0, 1], [0, 6]]
for _ in range(num_epoch):
row_count = 0
for _ in iter1:
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (2, 384, 682, 3)
image_list.append(image[0])
label_list.append(label)
label_list_per_epoch.append(label)
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == int(num_samples / batch_size)
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == int(num_samples / batch_size) * num_epoch
np.testing.assert_array_equal(label_list, np.array(label_golden))
if plot:
visualize_list(image_list)
# Restore configuration
ds.config.set_seed(original_seed)
ds.config.set_num_parallel_workers(original_num_workers)
if my_debug_mode:
ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_drop, my_num_samples",
[(False, False, 7), (True, False, 7)])
def test_pipeline_debug_mode_multi_ep_im_batch_with_remainders(my_debug_mode, my_drop, my_num_samples, plot=False):
"""
Feature: Pipeline debug mode.
Description: Test multiple epoch scenario using ImageFolderDataset with batch op and remainder.
Expectation: Output is equal to the expected output
"""
logger.info("test_pipeline_debug_mode_multi_ep_im_batch_with_remainders")
# Set configuration
original_seed = config_get_set_seed(899)
original_num_workers = config_get_set_num_parallel_workers(1)
if my_debug_mode:
debug_mode_original = ds.config.get_debug_mode()
ds.config.set_debug_mode(True)
num_samples = my_num_samples
batch_size = 2
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
num_samples=num_samples,
decode=True)
data1 = data1.batch(batch_size, drop_remainder=my_drop)
num_epoch = 3
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
epoch_count = 0
sample_count = 0
image_list = []
label_list = []
label_golden = [[2, 1], [3, 0], [4, 6], [5]] + [[3, 6], [2, 0], [3, 5], [1]] + [[5, 4], [0, 1], [0, 6], [6]]
for _ in range(num_epoch):
row_count = 0
label_list_per_epoch = []
for row_item in iter1:
image = row_item["image"]
label = row_item["label"]
assert image.shape == (2, 384, 682, 3) or (1, 384, 682, 3)
image_list.append(image[0])
label_list.append(list(label))
label_list_per_epoch.append(list(label))
row_count += 1
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
assert row_count == math.ceil(num_samples / batch_size)
epoch_count += 1
sample_count += row_count
assert epoch_count == num_epoch
assert sample_count == math.ceil(num_samples / batch_size) * num_epoch
assert label_list == label_golden
if plot:
visualize_list(image_list)
# Restore configuration
ds.config.set_seed(original_seed)
ds.config.set_num_parallel_workers(original_num_workers)
if my_debug_mode:
ds.config.set_debug_mode(debug_mode_original)
if __name__ == '__main__':
test_pipeline_debug_mode_multi_epoch_celaba(True, plot=True)
test_pipeline_debug_mode_multi_epoch_celaba_take(True)
test_pipeline_debug_mode_multi_epoch_cifar10_take(True)
test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(True)
test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(True)
test_pipeline_debug_mode_multi_epoch_cifar10_zip(True)
test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(True)
test_pipeline_debug_mode_multi_epoch_imagefolder_batch(True, False, 10)
test_pipeline_debug_mode_multi_epoch_imagefolder(True, plot=True)
test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle(True, True, plot=True)
test_pipeline_debug_mode_multi_epoch_imagefolder_repeat(True, plot=True)
test_pipeline_debug_mode_multi_ep_im_batch_no_remainder(True, True, 7, plot=True)
test_pipeline_debug_mode_multi_ep_im_batch_with_remainder(True, False, 7, plot=True)