[MD] Debug Mode - Mappable Op Row Content Order Fix and UTs

2023-01-27 21:12:18 -05:00 · 2023-01-27 21:12:18 -05:00 · 3c1b2676e3
parent fdb3e202a2
commit 3c1b2676e3
17 changed files with 439 additions and 38 deletions
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.cc
@ -183,15 +183,16 @@ Status MappableLeafOp::GetNextRowPullMode(TensorRow *const row) {
    RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
    CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
    sample_ids_ = sample_row[0];
    MS_LOG(DEBUG) << "Set sample_ids_=" << (*sample_ids_);
  }
  if (curr_row_ + 1 > sample_ids_->Size()) {
    *row = TensorRow(TensorRow::kFlagEOE);
    RETURN_IF_NOT_OK(ResetAndUpdateRepeat());
    RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
    return Status::OK();
  }
  int64_t key;
  RETURN_IF_NOT_OK(sample_ids_->GetItemAt(&key, {curr_row_}));
  MS_LOG(DEBUG) << "Got key=" << key << " with curr_row_=" << curr_row_;
  RETURN_IF_NOT_OK(LoadTensorRowPullMode(key, row));
  curr_row_++;
  return Status::OK();
@ -200,6 +201,12 @@ Status MappableLeafOp::GetNextRowPullMode(TensorRow *const row) {
 Status MappableLeafOp::ResetAndUpdateRepeat() {
  if (!IsLastIteration()) {
    RETURN_IF_NOT_OK(Reset());
    TensorRow sample_row;
    RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
    CHECK_FAIL_RETURN_UNEXPECTED(sample_row.size() > 0, "GetNextRowPullMode: Expect at least one sample in sampler.");
    // Get sample_ids
    sample_ids_ = sample_row[0];
    MS_LOG(DEBUG) << "Set sample_ids_=" << (*sample_ids_);
    UpdateRepeatAndEpochCounter();
  } else {
    eof_handled_ = true;
--- a/tests/ut/cpp/dataset/c_api_datasets_test.cc
+++ b/tests/ut/cpp/dataset/c_api_datasets_test.cc
@ -48,7 +48,7 @@ TEST_F(MindDataTestPipeline, TestCelebADataset) {
  ASSERT_OK(iter->GetNextRow(&row));
  // Check if CelebA() read correct images/attr
-  std::string expect_file[] = {"1.JPEG", "2.jpg"};
+  std::string expect_file[] = {"1_apple.JPEG", "2_banana.jpg"};
  std::vector<std::vector<uint32_t>> expect_attr_vector = {
     {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
      0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
--- a/tests/ut/data/dataset/testCelebAData/1_apple.JPEG
+++ b/tests/ut/data/dataset/testCelebAData/1_apple.JPEG
--- a/tests/ut/data/dataset/testCelebAData/2_banana.jpg
+++ b/tests/ut/data/dataset/testCelebAData/2_banana.jpg
--- a/tests/ut/data/dataset/testCelebAData/3_lemon.jpg
+++ b/tests/ut/data/dataset/testCelebAData/3_lemon.jpg
--- a/tests/ut/data/dataset/testCelebAData/4_avacado.JPEG
+++ b/tests/ut/data/dataset/testCelebAData/4_avacado.JPEG
--- a/tests/ut/data/dataset/testCelebAData/list_attr_celeba.txt
+++ b/tests/ut/data/dataset/testCelebAData/list_attr_celeba.txt
@ -1,6 +1,6 @@
 4
 5_o_Clock_Shadow Arched_Eyebrows Attractive Bags_Under_Eyes Bald Bangs Big_Lips Big_Nose Black_Hair Blond_Hair Blurry Brown_Hair Bushy_Eyebrows Chubby Double_Chin Eyeglasses Goatee Gray_Hair Heavy_Makeup High_Cheekbones Male Mouth_Slightly_Open Mustache Narrow_Eyes No_Beard Oval_Face Pale_Skin Pointy_Nose Receding_Hairline Rosy_Cheeks Sideburns Smiling Straight_Hair Wavy_Hair Wearing_Earrings Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young
-1.JPEG -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1 -1 -1  1 -1 -1  1 -1 -1 -1  1  1 -1  1 -1  1 -1 -1  1
+1_apple.JPEG -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1 -1 -1  1 -1 -1  1 -1 -1 -1  1  1 -1  1 -1  1 -1 -1  1
-2.jpg  -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1
+2_banana.jpg  -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1
-2.jpg  -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1
+3_lemon.jpg  -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1  1
-1.JPEG -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1 -1 -1  1 -1 -1  1 -1 -1 -1  1  1 -1  1 -1  1 -1 -1  1
+4_avacado.JPEG -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1 -1 -1  1 -1 -1  1 -1 -1 -1  1  1 -1  1 -1  1 -1 -1  1
--- a/tests/ut/data/dataset/testCelebAData/list_eval_partition.txt
+++ b/tests/ut/data/dataset/testCelebAData/list_eval_partition.txt
@ -1,4 +1,4 @@
-1.JPEG 0
+1_apple.JPEG 0
-2.jpeg 1
+2_banana.jpg 1
-2.jpeg 2 
+3_lemon.jpg 2
-2.jpeg 0 
+4_avacado.JPEG 0
--- a/tests/ut/data/dataset/testImageNetData4/datasetSchema.json
+++ b/tests/ut/data/dataset/testImageNetData4/datasetSchema.json
@ -0,0 +1,16 @@
 {
  "datasetType": "IMAGENET",
  "numRows": 7,
  "columns": {
    "image": {
      "type": "uint8",
      "rank": 1,
      "t_impl": "cvmat"
    },
    "label" : {
      "type": "uint32",
      "rank": 0,
      "t_impl" : "flex"
    }
  }
 }
--- a/tests/ut/data/dataset/testImageNetData4/train/class1/1_1_greengrapes.jpg
+++ b/tests/ut/data/dataset/testImageNetData4/train/class1/1_1_greengrapes.jpg
--- a/tests/ut/data/dataset/testImageNetData4/train/class2/1_2_lemon.jpg
+++ b/tests/ut/data/dataset/testImageNetData4/train/class2/1_2_lemon.jpg
--- a/tests/ut/data/dataset/testImageNetData4/train/class3/1_3_strawberry.jpg
+++ b/tests/ut/data/dataset/testImageNetData4/train/class3/1_3_strawberry.jpg
--- a/tests/ut/data/dataset/testImageNetData4/train/class4/1_4_greenapple.jpg
+++ b/tests/ut/data/dataset/testImageNetData4/train/class4/1_4_greenapple.jpg
--- a/tests/ut/data/dataset/testImageNetData4/train/class5/1_5_watermelon.JPEG
+++ b/tests/ut/data/dataset/testImageNetData4/train/class5/1_5_watermelon.JPEG
--- a/tests/ut/data/dataset/testImageNetData4/train/class6/1_6_lime.JPEG
+++ b/tests/ut/data/dataset/testImageNetData4/train/class6/1_6_lime.JPEG
--- a/tests/ut/data/dataset/testImageNetData4/train/class7/1_7_cherry.JPEG
+++ b/tests/ut/data/dataset/testImageNetData4/train/class7/1_7_cherry.JPEG
--- a/tests/ut/python/dataset/test_pipeline_debug_mode_multi_epoch.py
+++ b/tests/ut/python/dataset/test_pipeline_debug_mode_multi_epoch.py
@ -15,19 +15,74 @@
 """
 Test multiple epoch scenarios in debug mode
 """
 import math
 import numpy as np
 import pytest
 import mindspore.dataset as ds
 from mindspore import log as logger
-from util import config_get_set_seed
+from util import config_get_set_seed, visualize_list, config_get_set_num_parallel_workers
 pytestmark = pytest.mark.forked
@pytest.mark.parametrize("my_debug_mode", (False, True))
 def test_pipeline_debug_mode_multi_epoch_celaba(my_debug_mode, plot=False):
    """
    Feature: Pipeline debug mode.
    Description: Test multiple epoch scenario using CelebADataset.
    Expectation: Output is equal to the expected output
    """
    logger.info("test_pipeline_debug_mode_multi_epoch_celaba")
    # Set configuration
    original_seed = config_get_set_seed(99)
    original_num_workers = config_get_set_num_parallel_workers(1)
    if my_debug_mode:
        debug_mode_original = ds.config.get_debug_mode()
        ds.config.set_debug_mode(True)
    # testCelebAData has 4 samples
    num_samples = 4
    data1 = ds.CelebADataset("../data/dataset/testCelebAData/", decode=True)
    # Confirm dataset size
    assert data1.get_dataset_size() == num_samples
    num_epoch = 2
    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    image_list = []
    for _ in range(num_epoch):
        row_count = 0
        for row_item in iter1:
            # Note: Each row in this CelebADataset pipeline has columns "image" and "attr"
            assert len(row_item) == 2
            assert row_item["image"].shape == (2268, 4032, 3)
            image = row_item["image"]
            image_list.append(image)
            row_count += 1
        assert row_count == num_samples
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == num_samples * num_epoch
    if plot:
        visualize_list(image_list)
    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_workers)
    if my_debug_mode:
        ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode", (False, True))
 def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
    """
    Feature: Pipeline debug mode.
-    Description: Test creating tuple iterator in CelebA dataset with take op and multi epochs.
+    Description: Test multiple epoch scenario using CelebADataset with take op.
    Expectation: Output is equal to the expected output
    """
    logger.info("test_pipeline_debug_mode_multi_epoch_celaba_take")
@ -45,14 +100,15 @@ def test_pipeline_debug_mode_multi_epoch_celaba_take(my_debug_mode):
    data1 = data1.take(num_take)
    num_epoch = 2
-    # Use create_tuple_iterator
+    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    iter1 = data1.create_tuple_iterator(num_epochs=num_epoch)
    epoch_count = 0
    sample_count = 0
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        for row_item in iter1:
-            # in this example, each row has columns "image" and "label"
+            # Note: Each row in this CelebADataset pipeline has columns "image" and "attr"
            assert len(row_item) == 2
            assert row_item["image"].shape == (2268, 4032, 3)
            row_count += 1
        assert row_count == num_take
        epoch_count += 1
@ -100,7 +156,9 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_take(my_debug_mode):
    sample_count = 0
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        for row_item in iter1:
            image = row_item["image"]
            assert image.shape == (32, 32, 3)
            row_count += 1
        assert row_count == num_take
        epoch_count += 1
@ -130,29 +188,47 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(my_debug_mode):
        ds.config.set_debug_mode(True)
    data_dir_10 = "../data/dataset/testCifar10Data"
-    num_samples = 100
+    num_samples = 40
    num_repeat = 2
-    batch_size = 32
+    batch_size = 16
    data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
    # Add repeat then batch
    data1 = data1.repeat(num_repeat)
    data1 = data1.batch(batch_size, True)
-    num_epoch = 5
+    num_epoch = 2
-    # Use create_tuple_iterator
+    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    iter1 = data1.create_tuple_iterator(num_epochs=num_epoch)
    epoch_count = 0
    sample_count = 0
    label_list = []
    label_golden = [[0, 7, 8, 4, 9, 1, 9, 8, 6, 2, 7, 0, 2, 1, 7, 0],
                    [1, 4, 4, 7, 9, 4, 7, 8, 3, 4, 3, 9, 4, 7, 3, 9],
                    [0, 0, 5, 6, 6, 6, 5, 4, 7, 6, 9, 0, 3, 4, 3, 7],
                    [2, 4, 0, 3, 1, 7, 7, 9, 9, 8, 7, 0, 6, 3, 7, 9],
                    [0, 8, 7, 8, 1, 6, 4, 2, 6, 4, 9, 5, 3, 2, 5, 9],
                    [2, 5, 3, 6, 2, 0, 7, 8, 3, 9, 6, 2, 4, 1, 5, 4],
                    [5, 0, 9, 8, 6, 4, 9, 2, 0, 0, 0, 2, 7, 2, 6, 4],
                    [6, 2, 7, 7, 2, 6, 9, 2, 8, 5, 7, 6, 6, 4, 6, 2],
                    [0, 7, 2, 9, 8, 6, 7, 6, 0, 3, 2, 2, 6, 8, 8, 2],
                    [4, 6, 6, 9, 8, 4, 6, 1, 0, 5, 5, 9, 2, 0, 8, 7]]
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (batch_size, 32, 32, 3)
            label_list.append(label)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == int(num_samples * num_repeat / batch_size)
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
    np.testing.assert_array_equal(label_list, np.array(label_golden))
    # Restore configuration
    ds.config.set_seed(original_seed)
@ -190,8 +266,14 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(my_debug_mode):
    sample_count = 0
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (batch_size, 32, 32, 3)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == int(num_samples * num_repeat / batch_size)
        epoch_count += 1
        sample_count += row_count
@ -242,8 +324,14 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip(my_debug_mode):
    sample_count = 0
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (batch_size, 32, 32, 3)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == int(num_samples * num_repeat / batch_size)
        epoch_count += 1
        sample_count += row_count
@ -274,7 +362,7 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
    data_dir_10 = "../data/dataset/testCifar10Data"
    num_samples = 20
    batch_size = 10
-    num_repeat = 5
+    num_repeat = 3
    data1 = ds.Cifar10Dataset(data_dir_10, num_samples=num_samples)
@ -292,15 +380,36 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
    iter1 = data3.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    label_list = []
    label_golden = [[5, 1, 3, 6, 2, 7, 5, 2, 1, 9],
                    [3, 0, 9, 1, 1, 2, 5, 5, 6, 3],
                    [5, 5, 8, 0, 8, 5, 4, 7, 2, 2],
                    [2, 2, 4, 8, 1, 1, 3, 0, 5, 8],
                    [1, 1, 0, 5, 5, 5, 8, 4, 4, 1],
                    [8, 2, 9, 0, 8, 1, 6, 0, 1, 8],
                    [7, 0, 6, 1, 6, 2, 7, 4, 2, 3],
                    [9, 8, 0, 2, 7, 4, 1, 9, 8, 3],
                    [7, 0, 2, 6, 2, 0, 2, 0, 7, 0],
                    [4, 7, 7, 7, 6, 5, 3, 4, 5, 9],
                    [1, 9, 7, 5, 7, 7, 2, 2, 9, 2],
                    [8, 8, 5, 1, 4, 0, 5, 5, 6, 6]]
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (batch_size, 32, 32, 3)
            label_list.append(label)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == int(num_samples * num_repeat / batch_size)
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == int(num_samples * num_repeat / batch_size) * num_epoch
    np.testing.assert_array_equal(label_list, np.array(label_golden))
    # Restore configuration
    ds.config.set_seed(original_seed)
@ -308,53 +417,322 @@ def test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(my_debug_mode)
        ds.config.set_debug_mode(debug_mode_original)
-@pytest.mark.parametrize("my_debug_mode, my_drop_remainder, my_num_samples",
+@pytest.mark.parametrize("my_debug_mode", (False, True))
-                         [(False, False, 40), (True, False, 40), (True, True, 43)])
+def test_pipeline_debug_mode_multi_epoch_imagefolder(my_debug_mode, plot=False):
 def test_pipeline_debug_mode_multi_epoch_imagefolder_batch(my_debug_mode, my_drop_remainder, my_num_samples):
    """
    Feature: Pipeline debug mode.
-    Description: Test multiple epoch scenario using ImageFolderDataset with batch op.
+    Description: Test multiple epoch scenario using ImageFolderDataset. Plot support provided.
    Expectation: Output is equal to the expected output
    """
-    logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_batch")
+    logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder")
    # Set configuration
-    original_seed = config_get_set_seed(799)
+    original_seed = config_get_set_seed(899)
    original_num_workers = config_get_set_num_parallel_workers(1)
    if my_debug_mode:
        debug_mode_original = ds.config.get_debug_mode()
        ds.config.set_debug_mode(True)
    # Note: testImageNetData4 has 7 samples in total
    num_samples = 7
    # Use all 7 samples from the dataset
    data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
                                  decode=True)
    # Confirm dataset size
    assert data1.get_dataset_size() == num_samples
    num_epoch = 4
    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    image_list = []
    label_list = []
    label_golden = [4, 3, 5, 0, 1, 2, 6] + [3, 2, 5, 1, 0, 6, 4] + [6, 0, 1, 2, 5, 4, 3] + [3, 4, 5, 1, 0, 6, 2]
    for _ in range(num_epoch):
        row_count = 0
        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (384, 682, 3)
            image_list.append(image)
            label_list.append(label)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == num_samples
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == num_samples * num_epoch
    assert label_list == label_golden
    if plot:
        visualize_list(image_list)
    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_workers)
    if my_debug_mode:
        ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_shuffle", [(False, True), (True, None), (True, True), (True, False)])
 def test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle(my_debug_mode, my_shuffle, plot=False):
    """
    Feature: Pipeline debug mode.
    Description: Test multiple epoch scenario using ImageFolderDataset with shuffle parameter. Plot support provided.
    Expectation: Output is equal to the expected output
    """
    logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle")
    # Set configuration
    original_seed = config_get_set_seed(899)
    original_num_workers = config_get_set_num_parallel_workers(1)
    if my_debug_mode:
        debug_mode_original = ds.config.get_debug_mode()
        ds.config.set_debug_mode(True)
    num_samples = 5
    data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
                                  shuffle=my_shuffle,
                                  num_samples=num_samples,
                                  decode=True)
    num_epoch = 2
    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    image_list = []
    label_list = []
    if my_shuffle is False:
        # Sequential order is used
        label_golden = list(range(0, 5)) * num_epoch
    else:
        # Random order is used, according to the seed value
        label_golden = [2, 1, 3, 0, 4] + [3, 6, 2, 0, 3]
    for _ in range(num_epoch):
        row_count = 0
        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (384, 682, 3)
            image_list.append(image)
            label_list.append(label)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == num_samples
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == num_samples * num_epoch
    assert label_list == label_golden
    if plot:
        visualize_list(image_list)
    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_workers)
    if my_debug_mode:
        ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode", (False, True))
 def test_pipeline_debug_mode_multi_epoch_imagefolder_repeat(my_debug_mode, plot=False):
    """
    Feature: Pipeline debug mode.
    Description: Test multiple epoch scenario using ImageFolderDataset with repeat op. Plot support provided.
    Expectation: Output is equal to the expected output
    """
    logger.info("test_pipeline_debug_mode_multi_epoch_imagefolder_repeat")
    # Set configuration
    original_seed = config_get_set_seed(899)
    original_num_workers = config_get_set_num_parallel_workers(1)
    if my_debug_mode:
        debug_mode_original = ds.config.get_debug_mode()
        ds.config.set_debug_mode(True)
    num_samples = 5
    num_repeat = 3
    data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
                                  shuffle=True,
                                  num_samples=num_samples,
                                  decode=True)
    data1 = data1.repeat(num_repeat)
    num_epoch = 2
    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    image_list = []
    label_list = []
    # Random order is used, according to the seed value
    label_golden = [2, 1, 3, 0, 4] + [3, 6, 2, 0, 3] + \
                   [5, 4, 0, 1, 0] + [0, 0, 1, 3, 5] + \
                   [4, 5, 5, 2, 0] + [1, 2, 4, 4, 5]
    for _ in range(num_epoch):
        row_count = 0
        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (384, 682, 3)
            image_list.append(image)
            label_list.append(label)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == num_samples * num_repeat
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == num_samples * num_repeat * num_epoch
    assert label_list == label_golden
    if plot:
        visualize_list(image_list)
    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_workers)
    if my_debug_mode:
        ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_drop, my_num_samples",
                         [(False, False, 6), (True, False, 6), (True, True, 7)])
 def test_pipeline_debug_mode_multi_ep_im_batch_no_remainder(my_debug_mode, my_drop, my_num_samples, plot=False):
    """
    Feature: Pipeline debug mode.
    Description: Test multiple epoch scenario using ImageFolderDataset with batch op and no remainder.
    Expectation: Output is equal to the expected output
    """
    logger.info("test_pipeline_debug_mode_multi_ep_im_batch_no_remainder")
    # Set configuration
    original_seed = config_get_set_seed(899)
    original_num_workers = config_get_set_num_parallel_workers(1)
    if my_debug_mode:
        debug_mode_original = ds.config.get_debug_mode()
        ds.config.set_debug_mode(True)
    num_samples = my_num_samples
-    batch_size = 5
+    batch_size = 2
-    data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", num_samples=num_samples)
+    data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
-    data1 = data1.batch(batch_size, drop_remainder=my_drop_remainder)
+                                  num_samples=num_samples,
                                  decode=True)
    data1 = data1.batch(batch_size, drop_remainder=my_drop)
    num_epoch = 3
    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    image_list = []
    label_list = []
    label_golden = [[2, 1], [3, 0], [4, 6]] + [[3, 6], [2, 0], [3, 5]] + [[5, 4], [0, 1], [0, 6]]
    for _ in range(num_epoch):
        row_count = 0
-        for _ in iter1:
+        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (2, 384, 682, 3)
            image_list.append(image[0])
            label_list.append(label)
            label_list_per_epoch.append(label)
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == int(num_samples / batch_size)
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == int(num_samples / batch_size) * num_epoch
    np.testing.assert_array_equal(label_list, np.array(label_golden))
    if plot:
        visualize_list(image_list)
    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_workers)
    if my_debug_mode:
        ds.config.set_debug_mode(debug_mode_original)
@pytest.mark.parametrize("my_debug_mode, my_drop, my_num_samples",
                         [(False, False, 7), (True, False, 7)])
 def test_pipeline_debug_mode_multi_ep_im_batch_with_remainders(my_debug_mode, my_drop, my_num_samples, plot=False):
    """
    Feature: Pipeline debug mode.
    Description: Test multiple epoch scenario using ImageFolderDataset with batch op and remainder.
    Expectation: Output is equal to the expected output
    """
    logger.info("test_pipeline_debug_mode_multi_ep_im_batch_with_remainders")
    # Set configuration
    original_seed = config_get_set_seed(899)
    original_num_workers = config_get_set_num_parallel_workers(1)
    if my_debug_mode:
        debug_mode_original = ds.config.get_debug_mode()
        ds.config.set_debug_mode(True)
    num_samples = my_num_samples
    batch_size = 2
    data1 = ds.ImageFolderDataset("../data/dataset/testImageNetData4/train",
                                  num_samples=num_samples,
                                  decode=True)
    data1 = data1.batch(batch_size, drop_remainder=my_drop)
    num_epoch = 3
    iter1 = data1.create_dict_iterator(num_epochs=num_epoch, output_numpy=True)
    epoch_count = 0
    sample_count = 0
    image_list = []
    label_list = []
    label_golden = [[2, 1], [3, 0], [4, 6], [5]] + [[3, 6], [2, 0], [3, 5], [1]] + [[5, 4], [0, 1], [0, 6], [6]]
    for _ in range(num_epoch):
        row_count = 0
        label_list_per_epoch = []
        for row_item in iter1:
            image = row_item["image"]
            label = row_item["label"]
            assert image.shape == (2, 384, 682, 3) or (1, 384, 682, 3)
            image_list.append(image[0])
            label_list.append(list(label))
            label_list_per_epoch.append(list(label))
            row_count += 1
        logger.info("epoch_count is {}, label_list_per_epoch is {}".format(epoch_count, label_list_per_epoch))
        assert row_count == math.ceil(num_samples / batch_size)
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    assert sample_count == math.ceil(num_samples / batch_size) * num_epoch
    assert label_list == label_golden
    if plot:
        visualize_list(image_list)
    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_workers)
    if my_debug_mode:
        ds.config.set_debug_mode(debug_mode_original)
 if __name__ == '__main__':
    test_pipeline_debug_mode_multi_epoch_celaba(True, plot=True)
    test_pipeline_debug_mode_multi_epoch_celaba_take(True)
    test_pipeline_debug_mode_multi_epoch_cifar10_take(True)
    test_pipeline_debug_mode_multi_epoch_cifar10_repeat_batch(True)
    test_pipeline_debug_mode_multi_epoch_cifar10_batch_repeat(True)
    test_pipeline_debug_mode_multi_epoch_cifar10_zip(True)
    test_pipeline_debug_mode_multi_epoch_cifar10_zip_batch_repeat(True)
-    test_pipeline_debug_mode_multi_epoch_imagefolder_batch(True, False, 10)
+    test_pipeline_debug_mode_multi_epoch_imagefolder(True, plot=True)
    test_pipeline_debug_mode_multi_epoch_imagefolder_shuffle(True, True, plot=True)
    test_pipeline_debug_mode_multi_epoch_imagefolder_repeat(True, plot=True)
    test_pipeline_debug_mode_multi_ep_im_batch_no_remainder(True, True, 7, plot=True)
    test_pipeline_debug_mode_multi_ep_im_batch_with_remainder(True, False, 7, plot=True)