[MD] Debug Mode - Mappable Op Row Content Order Fix and UTs
|
@ -183,15 +183,16 @@ Status MappableLeafOp::GetNextRowPullMode(TensorRow *const row) {
|
||||||
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
|
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
|
||||||
CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
|
CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
|
||||||
sample_ids_ = sample_row[0];
|
sample_ids_ = sample_row[0];
|
||||||
|
MS_LOG(DEBUG) << "Set sample_ids_=" << (*sample_ids_);
|
||||||
}
|
}
|
||||||
if (curr_row_ + 1 > sample_ids_->Size()) {
|
if (curr_row_ + 1 > sample_ids_->Size()) {
|
||||||
*row = TensorRow(TensorRow::kFlagEOE);
|
*row = TensorRow(TensorRow::kFlagEOE);
|
||||||
RETURN_IF_NOT_OK(ResetAndUpdateRepeat());
|
RETURN_IF_NOT_OK(ResetAndUpdateRepeat());
|
||||||
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
int64_t key;
|
int64_t key;
|
||||||
RETURN_IF_NOT_OK(sample_ids_->GetItemAt(&key, {curr_row_}));
|
RETURN_IF_NOT_OK(sample_ids_->GetItemAt(&key, {curr_row_}));
|
||||||
|
MS_LOG(DEBUG) << "Got key=" << key << " with curr_row_=" << curr_row_;
|
||||||
RETURN_IF_NOT_OK(LoadTensorRowPullMode(key, row));
|
RETURN_IF_NOT_OK(LoadTensorRowPullMode(key, row));
|
||||||
curr_row_++;
|
curr_row_++;
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
|
@ -200,6 +201,12 @@ Status MappableLeafOp::GetNextRowPullMode(TensorRow *const row) {
|
||||||
Status MappableLeafOp::ResetAndUpdateRepeat() {
|
Status MappableLeafOp::ResetAndUpdateRepeat() {
|
||||||
if (!IsLastIteration()) {
|
if (!IsLastIteration()) {
|
||||||
RETURN_IF_NOT_OK(Reset());
|
RETURN_IF_NOT_OK(Reset());
|
||||||
|
TensorRow sample_row;
|
||||||
|
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
|
||||||
|
// Get sample_ids
|
||||||
|
sample_ids_ = sample_row[0];
|
||||||
|
MS_LOG(DEBUG) << "Set sample_ids_=" << (*sample_ids_);
|
||||||
UpdateRepeatAndEpochCounter();
|
UpdateRepeatAndEpochCounter();
|
||||||
} else {
|
} else {
|
||||||
eof_handled_ = true;
|
eof_handled_ = true;
|
||||||
|
|
|
@ -48,7 +48,7 @@ TEST_F(MindDataTestPipeline, TestCelebADataset) {
|
||||||
ASSERT_OK(iter->GetNextRow(&row));
|
ASSERT_OK(iter->GetNextRow(&row));
|
||||||
|
|
||||||
// Check if CelebA() read correct images/attr
|
// Check if CelebA() read correct images/attr
|
||||||
std::string expect_file[] = {"1.JPEG", "2.jpg"};
|
std::string expect_file[] = {"1_apple.JPEG", "2_banana.jpg"};
|
||||||
std::vector<std::vector<uint32_t>> expect_attr_vector = {
|
std::vector<std::vector<uint32_t>> expect_attr_vector = {
|
||||||
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
|
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
|
||||||
|
|
Before Width: | Height: | Size: 155 KiB After Width: | Height: | Size: 155 KiB |
Before Width: | Height: | Size: 169 KiB After Width: | Height: | Size: 169 KiB |
After Width: | Height: | Size: 451 KiB |
After Width: | Height: | Size: 457 KiB |
|
@ -1,6 +1,6 @@
|
||||||
4
|
4
|
||||||
5_o_Clock_Shadow Arched_Eyebrows Attractive Bags_Under_Eyes Bald Bangs Big_Lips Big_Nose Black_Hair Blond_Hair Blurry Brown_Hair Bushy_Eyebrows Chubby Double_Chin Eyeglasses Goatee Gray_Hair Heavy_Makeup High_Cheekbones Male Mouth_Slightly_Open Mustache Narrow_Eyes No_Beard Oval_Face Pale_Skin Pointy_Nose Receding_Hairline Rosy_Cheeks Sideburns Smiling Straight_Hair Wavy_Hair Wearing_Earrings Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young
|
5_o_Clock_Shadow Arched_Eyebrows Attractive Bags_Under_Eyes Bald Bangs Big_Lips Big_Nose Black_Hair Blond_Hair Blurry Brown_Hair Bushy_Eyebrows Chubby Double_Chin Eyeglasses Goatee Gray_Hair Heavy_Makeup High_Cheekbones Male Mouth_Slightly_Open Mustache Narrow_Eyes No_Beard Oval_Face Pale_Skin Pointy_Nose Receding_Hairline Rosy_Cheeks Sideburns Smiling Straight_Hair Wavy_Hair Wearing_Earrings Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young
|
||||||
1.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
|
1_apple.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
|
||||||
2.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
|
2_banana.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
|
||||||
2.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
|
3_lemon.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1
|
||||||
1.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
|
4_avacado.JPEG -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
1.JPEG 0
|
1_apple.JPEG 0
|
||||||
2.jpeg 1
|
2_banana.jpg 1
|
||||||
2.jpeg 2
|
3_lemon.jpg 2
|
||||||
2.jpeg 0
|
4_avacado.JPEG 0
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
"datasetType": "IMAGENET",
|
||||||
|
"numRows": 7,
|
||||||
|
"columns": {
|
||||||
|
"image": {
|
||||||
|
"type": "uint8",
|
||||||
|
"rank": 1,
|
||||||
|
"t_impl": "cvmat"
|
||||||
|
},
|
||||||
|
"label" : {
|
||||||
|
"type": "uint32",
|
||||||
|
"rank": 0,
|
||||||
|
"t_impl" : "flex"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
After Width: | Height: | Size: 18 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 27 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 9.9 KiB |
|
@ -15,19 +15,74 @@
|
||||||
"""
|
"""
|
||||||
Test multiple epoch scenarios in debug mode
|
Test multiple epoch scenarios in debug mode
|
||||||
"""
|
"""
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
import mindspore.dataset as ds
|
import mindspore.dataset as ds
|
||||||
from mindspore import log as logger
|
from mindspore import log as logger
|
||||||
from util import config_get_set_seed
|
from util import config_get_set_seed, visualize_list, config_get_set_num_parallel_workers
|
||||||
|
|
||||||
pytestmark = pytest.mark.forked
|
pytestmark = pytest.mark.forked
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("my_debug_mode", (False, True))
|
||||||
|
def test_pipeline_debug_mode_multi_epoch_celaba(my_debug_mode, plot=False):
|
||||||
|
"""
|
||||||
|
Feature: Pipeline debug mode.
|
||||||
|
Description: Test multiple epoch scenario using CelebADataset.
|
||||||
|
Expectation: Output is equal to the expected output
|
||||||
|
"""
|
||||||
|
logger.info("test_pipeline_debug_mode_multi_epoch_celaba")
|
||||||
|
|
||||||
|
# Set configuration
|
||||||
|
original_seed = config_get_set_seed(99)
|
||||||
|
original_num_workers = config_get_set_num_parallel_workers(1)
|
||||||
|
if my_debug_mode:
|
||||||
|
debug_mode_original = ds.config.get_debug_mode()
|
||||||
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
|
# testCelebAData has 4 samples
|
||||||
|
num_samples = 4
|
||||||
|
|
||||||
|
data1 = ds.CelebADataset("../data/dataset/testCelebAData/", decode=True)
|
||||||
|
|
||||||
|
# Confirm dataset size
|
||||||
|
assert data1.get_dataset_size() == num_samples
|
||||||
|
|
||||||
|
num_epoch = 2
|
||||||
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
|
epoch_count = 0
|
||||||
|
sample_count = 0
|
||||||
|
image_list = []
|
||||||
|
for _ in range(num_epoch):
|
||||||
|
row_count = 0
|
||||||
|
for row_item in iter1:
|
||||||
|
# Note: Each row in this CelebADataset pipeline has columns "image" and "attr"
|
||||||
|
assert len(row_item) == 2
|
||||||
|
assert row_item["image"].shape == (2268, 4032, 3)
|
||||||
|
image = row_item["image"]
|
||||||
|
image_list.append(image)
|
||||||
|
row_count += 1
|
||||||
|
assert row_count == num_samples
|
||||||
|
epoch_count += 1
|
||||||
|
sample_count += row_count
|
||||||
|
assert epoch_count == num_epoch
|
||||||
|
assert sample_count == num_samples * num_epoch
|
||||||
|
if plot:
|
||||||
|
visualize_list(image_list)
|
||||||
|
|
||||||
|
# Restore configuration
|
||||||
|
ds.config.set_seed(original_seed)
|
||||||
|
ds.config.set_num_parallel_workers(original_num_workers)
|
||||||
|
if my_debug_mode:
|
||||||
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("my_debug_mode", (False, True))
|
@pytest.mark.parametrize("my_debug_mode", (False, True))
|
||||||
def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
|
def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
|
||||||
"""
|
"""
|
||||||
Feature: Pipeline debug mode.
|
Feature: Pipeline debug mode.
|
||||||
Description: Test creating tuple iterator in CelebA dataset with take op and multi epochs.
|
Description: Test multiple epoch scenario using CelebADataset with take op.
|
||||||
Expectation: Output is equal to the expected output
|
Expectation: Output is equal to the expected output
|
||||||
"""
|
"""
|
||||||
logger.info("test_pipeline_debug_mode_multi_epoch_celaba_take")
|
logger.info("test_pipeline_debug_mode_multi_epoch_celaba_take")
|
||||||
|
@ -45,14 +100,15 @@ def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
|
||||||
data1 = data1.take(num_take)
|
data1 = data1.take(num_take)
|
||||||
|
|
||||||
num_epoch = 2
|
num_epoch = 2
|
||||||
# Use create_tuple_iterator
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
iter1 = data1.create_tuple_iterator(num_epochs=num_epoch)
|
|
||||||
epoch_count = 0
|
epoch_count = 0
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
for row_item in iter1:
|
||||||
# in this example, each row has columns "image" and "label"
|
# Note: Each row in this CelebADataset pipeline has columns "image" and "attr"
|
||||||
|
assert len(row_item) == 2
|
||||||
|
assert row_item["image"].shape == (2268, 4032, 3)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
assert row_count == num_take
|
assert row_count == num_take
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
|
@ -100,7 +156,9 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_take(my_debug_mode):
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
assert image.shape == (32, 32, 3)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
assert row_count == num_take
|
assert row_count == num_take
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
|
@ -130,29 +188,47 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(my_debug_mode):
|
||||||
ds.config.set_debug_mode(True)
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
data_dir_10 = "../data/dataset/testCifar10Data"
|
data_dir_10 = "../data/dataset/testCifar10Data"
|
||||||
num_samples = 100
|
num_samples = 40
|
||||||
num_repeat = 2
|
num_repeat = 2
|
||||||
batch_size = 32
|
batch_size = 16
|
||||||
|
|
||||||
data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
|
data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
|
||||||
# Add repeat then batch
|
# Add repeat then batch
|
||||||
data1 = data1.repeat(num_repeat)
|
data1 = data1.repeat(num_repeat)
|
||||||
data1 = data1.batch(batch_size, True)
|
data1 = data1.batch(batch_size, True)
|
||||||
|
|
||||||
num_epoch = 5
|
num_epoch = 2
|
||||||
# Use create_tuple_iterator
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
iter1 = data1.create_tuple_iterator(num_epochs=num_epoch)
|
|
||||||
epoch_count = 0
|
epoch_count = 0
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
|
label_list = []
|
||||||
|
label_golden = [[0, 7, 8, 4, 9, 1, 9, 8, 6, 2, 7, 0, 2, 1, 7, 0],
|
||||||
|
[1, 4, 4, 7, 9, 4, 7, 8, 3, 4, 3, 9, 4, 7, 3, 9],
|
||||||
|
[0, 0, 5, 6, 6, 6, 5, 4, 7, 6, 9, 0, 3, 4, 3, 7],
|
||||||
|
[2, 4, 0, 3, 1, 7, 7, 9, 9, 8, 7, 0, 6, 3, 7, 9],
|
||||||
|
[0, 8, 7, 8, 1, 6, 4, 2, 6, 4, 9, 5, 3, 2, 5, 9],
|
||||||
|
[2, 5, 3, 6, 2, 0, 7, 8, 3, 9, 6, 2, 4, 1, 5, 4],
|
||||||
|
[5, 0, 9, 8, 6, 4, 9, 2, 0, 0, 0, 2, 7, 2, 6, 4],
|
||||||
|
[6, 2, 7, 7, 2, 6, 9, 2, 8, 5, 7, 6, 6, 4, 6, 2],
|
||||||
|
[0, 7, 2, 9, 8, 6, 7, 6, 0, 3, 2, 2, 6, 8, 8, 2],
|
||||||
|
[4, 6, 6, 9, 8, 4, 6, 1, 0, 5, 5, 9, 2, 0, 8, 7]]
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (batch_size, 32, 32, 3)
|
||||||
|
label_list.append(label)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
assert row_count == int(num_samples * num_repeat / batch_size)
|
assert row_count == int(num_samples * num_repeat / batch_size)
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
sample_count += row_count
|
sample_count += row_count
|
||||||
assert epoch_count == num_epoch
|
assert epoch_count == num_epoch
|
||||||
assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
|
assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
|
||||||
|
np.testing.assert_array_equal(label_list, np.array(label_golden))
|
||||||
|
|
||||||
# Restore configuration
|
# Restore configuration
|
||||||
ds.config.set_seed(original_seed)
|
ds.config.set_seed(original_seed)
|
||||||
|
@ -190,8 +266,14 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(my_debug_mode):
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (batch_size, 32, 32, 3)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
assert row_count == int(num_samples * num_repeat / batch_size)
|
assert row_count == int(num_samples * num_repeat / batch_size)
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
sample_count += row_count
|
sample_count += row_count
|
||||||
|
@ -242,8 +324,14 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip(my_debug_mode):
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (batch_size, 32, 32, 3)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
assert row_count == int(num_samples * num_repeat / batch_size)
|
assert row_count == int(num_samples * num_repeat / batch_size)
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
sample_count += row_count
|
sample_count += row_count
|
||||||
|
@ -274,7 +362,7 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
|
||||||
data_dir_10 = "../data/dataset/testCifar10Data"
|
data_dir_10 = "../data/dataset/testCifar10Data"
|
||||||
num_samples = 20
|
num_samples = 20
|
||||||
batch_size = 10
|
batch_size = 10
|
||||||
num_repeat = 5
|
num_repeat = 3
|
||||||
|
|
||||||
data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
|
data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
|
||||||
|
|
||||||
|
@ -292,15 +380,36 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
|
||||||
iter1 = data3.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
iter1 = data3.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
epoch_count = 0
|
epoch_count = 0
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
|
label_list = []
|
||||||
|
label_golden = [[5, 1, 3, 6, 2, 7, 5, 2, 1, 9],
|
||||||
|
[3, 0, 9, 1, 1, 2, 5, 5, 6, 3],
|
||||||
|
[5, 5, 8, 0, 8, 5, 4, 7, 2, 2],
|
||||||
|
[2, 2, 4, 8, 1, 1, 3, 0, 5, 8],
|
||||||
|
[1, 1, 0, 5, 5, 5, 8, 4, 4, 1],
|
||||||
|
[8, 2, 9, 0, 8, 1, 6, 0, 1, 8],
|
||||||
|
[7, 0, 6, 1, 6, 2, 7, 4, 2, 3],
|
||||||
|
[9, 8, 0, 2, 7, 4, 1, 9, 8, 3],
|
||||||
|
[7, 0, 2, 6, 2, 0, 2, 0, 7, 0],
|
||||||
|
[4, 7, 7, 7, 6, 5, 3, 4, 5, 9],
|
||||||
|
[1, 9, 7, 5, 7, 7, 2, 2, 9, 2],
|
||||||
|
[8, 8, 5, 1, 4, 0, 5, 5, 6, 6]]
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (batch_size, 32, 32, 3)
|
||||||
|
label_list.append(label)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
assert row_count == int(num_samples * num_repeat / batch_size)
|
assert row_count == int(num_samples * num_repeat / batch_size)
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
sample_count += row_count
|
sample_count += row_count
|
||||||
assert epoch_count == num_epoch
|
assert epoch_count == num_epoch
|
||||||
assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
|
assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
|
||||||
|
np.testing.assert_array_equal(label_list, np.array(label_golden))
|
||||||
|
|
||||||
# Restore configuration
|
# Restore configuration
|
||||||
ds.config.set_seed(original_seed)
|
ds.config.set_seed(original_seed)
|
||||||
|
@ -308,53 +417,322 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
|
||||||
ds.config.set_debug_mode(debug_mode_original)
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("my_debug_mode, my_drop_remainder, my_num_samples",
|
@pytest.mark.parametrize("my_debug_mode", (False, True))
|
||||||
[(False, False, 40), (True, False, 40), (True, True, 43)])
|
def test_pipeline_debug_mode_multi_epoch_imagefolder(my_debug_mode, plot=False):
|
||||||
def test_pipeline_debug_mode_multi_epoch_imagefolder_batch(my_debug_mode, my_drop_remainder, my_num_samples):
|
|
||||||
"""
|
"""
|
||||||
Feature: Pipeline debug mode.
|
Feature: Pipeline debug mode.
|
||||||
Description: Test multiple epoch scenario using ImageFolderDataset with batch op.
|
Description: Test multiple epoch scenario using ImageFolderDataset. Plot support provided.
|
||||||
Expectation: Output is equal to the expected output
|
Expectation: Output is equal to the expected output
|
||||||
"""
|
"""
|
||||||
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_batch")
|
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder")
|
||||||
|
|
||||||
# Set configuration
|
# Set configuration
|
||||||
original_seed = config_get_set_seed(799)
|
original_seed = config_get_set_seed(899)
|
||||||
|
original_num_workers = config_get_set_num_parallel_workers(1)
|
||||||
|
if my_debug_mode:
|
||||||
|
debug_mode_original = ds.config.get_debug_mode()
|
||||||
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
|
# Note: testImageNetData4 has 7 samples in total
|
||||||
|
num_samples = 7
|
||||||
|
|
||||||
|
# Use all 7 samples from the dataset
|
||||||
|
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
|
||||||
|
decode=True)
|
||||||
|
# Confirm dataset size
|
||||||
|
assert data1.get_dataset_size() == num_samples
|
||||||
|
|
||||||
|
num_epoch = 4
|
||||||
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
|
epoch_count = 0
|
||||||
|
sample_count = 0
|
||||||
|
image_list = []
|
||||||
|
label_list = []
|
||||||
|
label_golden = [4, 3, 5, 0, 1, 2, 6] + [3, 2, 5, 1, 0, 6, 4] + [6, 0, 1, 2, 5, 4, 3] + [3, 4, 5, 1, 0, 6, 2]
|
||||||
|
for _ in range(num_epoch):
|
||||||
|
row_count = 0
|
||||||
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (384, 682, 3)
|
||||||
|
image_list.append(image)
|
||||||
|
label_list.append(label)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
|
assert row_count == num_samples
|
||||||
|
epoch_count += 1
|
||||||
|
sample_count += row_count
|
||||||
|
assert epoch_count == num_epoch
|
||||||
|
assert sample_count == num_samples * num_epoch
|
||||||
|
assert label_list == label_golden
|
||||||
|
if plot:
|
||||||
|
visualize_list(image_list)
|
||||||
|
|
||||||
|
# Restore configuration
|
||||||
|
ds.config.set_seed(original_seed)
|
||||||
|
ds.config.set_num_parallel_workers(original_num_workers)
|
||||||
|
if my_debug_mode:
|
||||||
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("my_debug_mode, my_shuffle", [(False, True), (True, None), (True, True), (True, False)])
|
||||||
|
def test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle(my_debug_mode, my_shuffle, plot=False):
|
||||||
|
"""
|
||||||
|
Feature: Pipeline debug mode.
|
||||||
|
Description: Test multiple epoch scenario using ImageFolderDataset with shuffle parameter. Plot support provided.
|
||||||
|
Expectation: Output is equal to the expected output
|
||||||
|
"""
|
||||||
|
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle")
|
||||||
|
|
||||||
|
# Set configuration
|
||||||
|
original_seed = config_get_set_seed(899)
|
||||||
|
original_num_workers = config_get_set_num_parallel_workers(1)
|
||||||
|
if my_debug_mode:
|
||||||
|
debug_mode_original = ds.config.get_debug_mode()
|
||||||
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
|
num_samples = 5
|
||||||
|
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
|
||||||
|
shuffle=my_shuffle,
|
||||||
|
num_samples=num_samples,
|
||||||
|
decode=True)
|
||||||
|
|
||||||
|
num_epoch = 2
|
||||||
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
|
epoch_count = 0
|
||||||
|
sample_count = 0
|
||||||
|
image_list = []
|
||||||
|
label_list = []
|
||||||
|
if my_shuffle is False:
|
||||||
|
# Sequential order is used
|
||||||
|
label_golden = list(range(0, 5)) * num_epoch
|
||||||
|
else:
|
||||||
|
# Random order is used, according to the seed value
|
||||||
|
label_golden = [2, 1, 3, 0, 4] + [3, 6, 2, 0, 3]
|
||||||
|
for _ in range(num_epoch):
|
||||||
|
row_count = 0
|
||||||
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (384, 682, 3)
|
||||||
|
image_list.append(image)
|
||||||
|
label_list.append(label)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
|
assert row_count == num_samples
|
||||||
|
epoch_count += 1
|
||||||
|
sample_count += row_count
|
||||||
|
assert epoch_count == num_epoch
|
||||||
|
assert sample_count == num_samples * num_epoch
|
||||||
|
assert label_list == label_golden
|
||||||
|
if plot:
|
||||||
|
visualize_list(image_list)
|
||||||
|
|
||||||
|
# Restore configuration
|
||||||
|
ds.config.set_seed(original_seed)
|
||||||
|
ds.config.set_num_parallel_workers(original_num_workers)
|
||||||
|
if my_debug_mode:
|
||||||
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("my_debug_mode", (False, True))
|
||||||
|
def test_pipeline_debug_mode_multi_epoch_imagefolder_repeat(my_debug_mode, plot=False):
|
||||||
|
"""
|
||||||
|
Feature: Pipeline debug mode.
|
||||||
|
Description: Test multiple epoch scenario using ImageFolderDataset with repeat op. Plot support provided.
|
||||||
|
Expectation: Output is equal to the expected output
|
||||||
|
"""
|
||||||
|
logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_repeat")
|
||||||
|
|
||||||
|
# Set configuration
|
||||||
|
original_seed = config_get_set_seed(899)
|
||||||
|
original_num_workers = config_get_set_num_parallel_workers(1)
|
||||||
|
if my_debug_mode:
|
||||||
|
debug_mode_original = ds.config.get_debug_mode()
|
||||||
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
|
num_samples = 5
|
||||||
|
num_repeat = 3
|
||||||
|
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
|
||||||
|
shuffle=True,
|
||||||
|
num_samples=num_samples,
|
||||||
|
decode=True)
|
||||||
|
data1 = data1.repeat(num_repeat)
|
||||||
|
|
||||||
|
num_epoch = 2
|
||||||
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
|
epoch_count = 0
|
||||||
|
sample_count = 0
|
||||||
|
image_list = []
|
||||||
|
label_list = []
|
||||||
|
# Random order is used, according to the seed value
|
||||||
|
label_golden = [2, 1, 3, 0, 4] + [3, 6, 2, 0, 3] + \
|
||||||
|
[5, 4, 0, 1, 0] + [0, 0, 1, 3, 5] + \
|
||||||
|
[4, 5, 5, 2, 0] + [1, 2, 4, 4, 5]
|
||||||
|
for _ in range(num_epoch):
|
||||||
|
row_count = 0
|
||||||
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (384, 682, 3)
|
||||||
|
image_list.append(image)
|
||||||
|
label_list.append(label)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
|
assert row_count == num_samples * num_repeat
|
||||||
|
epoch_count += 1
|
||||||
|
sample_count += row_count
|
||||||
|
assert epoch_count == num_epoch
|
||||||
|
assert sample_count == num_samples * num_repeat * num_epoch
|
||||||
|
assert label_list == label_golden
|
||||||
|
|
||||||
|
if plot:
|
||||||
|
visualize_list(image_list)
|
||||||
|
|
||||||
|
# Restore configuration
|
||||||
|
ds.config.set_seed(original_seed)
|
||||||
|
ds.config.set_num_parallel_workers(original_num_workers)
|
||||||
|
if my_debug_mode:
|
||||||
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("my_debug_mode, my_drop, my_num_samples",
|
||||||
|
[(False, False, 6), (True, False, 6), (True, True, 7)])
|
||||||
|
def test_pipeline_debug_mode_multi_ep_im_batch_no_remainder(my_debug_mode, my_drop, my_num_samples, plot=False):
|
||||||
|
"""
|
||||||
|
Feature: Pipeline debug mode.
|
||||||
|
Description: Test multiple epoch scenario using ImageFolderDataset with batch op and no remainder.
|
||||||
|
Expectation: Output is equal to the expected output
|
||||||
|
"""
|
||||||
|
logger.info("test_pipeline_debug_mode_multi_ep_im_batch_no_remainder")
|
||||||
|
|
||||||
|
# Set configuration
|
||||||
|
original_seed = config_get_set_seed(899)
|
||||||
|
original_num_workers = config_get_set_num_parallel_workers(1)
|
||||||
if my_debug_mode:
|
if my_debug_mode:
|
||||||
debug_mode_original = ds.config.get_debug_mode()
|
debug_mode_original = ds.config.get_debug_mode()
|
||||||
ds.config.set_debug_mode(True)
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
num_samples = my_num_samples
|
num_samples = my_num_samples
|
||||||
batch_size = 5
|
batch_size = 2
|
||||||
|
|
||||||
data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", num_samples=num_samples)
|
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
|
||||||
data1 = data1.batch(batch_size, drop_remainder=my_drop_remainder)
|
num_samples=num_samples,
|
||||||
|
decode=True)
|
||||||
|
data1 = data1.batch(batch_size, drop_remainder=my_drop)
|
||||||
|
|
||||||
num_epoch = 3
|
num_epoch = 3
|
||||||
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
epoch_count = 0
|
epoch_count = 0
|
||||||
sample_count = 0
|
sample_count = 0
|
||||||
|
image_list = []
|
||||||
|
label_list = []
|
||||||
|
label_golden = [[2, 1], [3, 0], [4, 6]] + [[3, 6], [2, 0], [3, 5]] + [[5, 4], [0, 1], [0, 6]]
|
||||||
for _ in range(num_epoch):
|
for _ in range(num_epoch):
|
||||||
row_count = 0
|
row_count = 0
|
||||||
for _ in iter1:
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (2, 384, 682, 3)
|
||||||
|
image_list.append(image[0])
|
||||||
|
label_list.append(label)
|
||||||
|
label_list_per_epoch.append(label)
|
||||||
row_count += 1
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
assert row_count == int(num_samples / batch_size)
|
assert row_count == int(num_samples / batch_size)
|
||||||
epoch_count += 1
|
epoch_count += 1
|
||||||
sample_count += row_count
|
sample_count += row_count
|
||||||
|
|
||||||
assert epoch_count == num_epoch
|
assert epoch_count == num_epoch
|
||||||
assert sample_count == int(num_samples / batch_size) * num_epoch
|
assert sample_count == int(num_samples / batch_size) * num_epoch
|
||||||
|
np.testing.assert_array_equal(label_list, np.array(label_golden))
|
||||||
|
if plot:
|
||||||
|
visualize_list(image_list)
|
||||||
|
|
||||||
# Restore configuration
|
# Restore configuration
|
||||||
ds.config.set_seed(original_seed)
|
ds.config.set_seed(original_seed)
|
||||||
|
ds.config.set_num_parallel_workers(original_num_workers)
|
||||||
|
if my_debug_mode:
|
||||||
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("my_debug_mode, my_drop, my_num_samples",
|
||||||
|
[(False, False, 7), (True, False, 7)])
|
||||||
|
def test_pipeline_debug_mode_multi_ep_im_batch_with_remainders(my_debug_mode, my_drop, my_num_samples, plot=False):
|
||||||
|
"""
|
||||||
|
Feature: Pipeline debug mode.
|
||||||
|
Description: Test multiple epoch scenario using ImageFolderDataset with batch op and remainder.
|
||||||
|
Expectation: Output is equal to the expected output
|
||||||
|
"""
|
||||||
|
logger.info("test_pipeline_debug_mode_multi_ep_im_batch_with_remainders")
|
||||||
|
|
||||||
|
# Set configuration
|
||||||
|
original_seed = config_get_set_seed(899)
|
||||||
|
original_num_workers = config_get_set_num_parallel_workers(1)
|
||||||
|
if my_debug_mode:
|
||||||
|
debug_mode_original = ds.config.get_debug_mode()
|
||||||
|
ds.config.set_debug_mode(True)
|
||||||
|
|
||||||
|
num_samples = my_num_samples
|
||||||
|
batch_size = 2
|
||||||
|
|
||||||
|
data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
|
||||||
|
num_samples=num_samples,
|
||||||
|
decode=True)
|
||||||
|
data1 = data1.batch(batch_size, drop_remainder=my_drop)
|
||||||
|
|
||||||
|
num_epoch = 3
|
||||||
|
iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
|
||||||
|
epoch_count = 0
|
||||||
|
sample_count = 0
|
||||||
|
image_list = []
|
||||||
|
label_list = []
|
||||||
|
label_golden = [[2, 1], [3, 0], [4, 6], [5]] + [[3, 6], [2, 0], [3, 5], [1]] + [[5, 4], [0, 1], [0, 6], [6]]
|
||||||
|
for _ in range(num_epoch):
|
||||||
|
row_count = 0
|
||||||
|
label_list_per_epoch = []
|
||||||
|
for row_item in iter1:
|
||||||
|
image = row_item["image"]
|
||||||
|
label = row_item["label"]
|
||||||
|
assert image.shape == (2, 384, 682, 3) or (1, 384, 682, 3)
|
||||||
|
image_list.append(image[0])
|
||||||
|
label_list.append(list(label))
|
||||||
|
label_list_per_epoch.append(list(label))
|
||||||
|
row_count += 1
|
||||||
|
logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
|
||||||
|
assert row_count == math.ceil(num_samples / batch_size)
|
||||||
|
epoch_count += 1
|
||||||
|
sample_count += row_count
|
||||||
|
assert epoch_count == num_epoch
|
||||||
|
assert sample_count == math.ceil(num_samples / batch_size) * num_epoch
|
||||||
|
assert label_list == label_golden
|
||||||
|
if plot:
|
||||||
|
visualize_list(image_list)
|
||||||
|
|
||||||
|
# Restore configuration
|
||||||
|
ds.config.set_seed(original_seed)
|
||||||
|
ds.config.set_num_parallel_workers(original_num_workers)
|
||||||
if my_debug_mode:
|
if my_debug_mode:
|
||||||
ds.config.set_debug_mode(debug_mode_original)
|
ds.config.set_debug_mode(debug_mode_original)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
test_pipeline_debug_mode_multi_epoch_celaba(True, plot=True)
|
||||||
test_pipeline_debug_mode_multi_epoch_celaba_take(True)
|
test_pipeline_debug_mode_multi_epoch_celaba_take(True)
|
||||||
test_pipeline_debug_mode_multi_epoch_cifar10_take(True)
|
test_pipeline_debug_mode_multi_epoch_cifar10_take(True)
|
||||||
test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(True)
|
test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(True)
|
||||||
test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(True)
|
test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(True)
|
||||||
test_pipeline_debug_mode_multi_epoch_cifar10_zip(True)
|
test_pipeline_debug_mode_multi_epoch_cifar10_zip(True)
|
||||||
test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(True)
|
test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(True)
|
||||||
test_pipeline_debug_mode_multi_epoch_imagefolder_batch(True, False, 10)
|
test_pipeline_debug_mode_multi_epoch_imagefolder(True, plot=True)
|
||||||
|
test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle(True, True, plot=True)
|
||||||
|
test_pipeline_debug_mode_multi_epoch_imagefolder_repeat(True, plot=True)
|
||||||
|
test_pipeline_debug_mode_multi_ep_im_batch_no_remainder(True, True, 7, plot=True)
|
||||||
|
test_pipeline_debug_mode_multi_ep_im_batch_with_remainder(True, False, 7, plot=True)
|
||||||
|
|