From d5e896b51ca20fe0cb7c65b9a6c7221d41be0e5b Mon Sep 17 00:00:00 2001
From: ms_yan <6576637+ms_yan@user.noreply.gitee.com>
Date: Fri, 22 May 2020 17:07:21 +0800
Subject: [PATCH] delete storageDataset Op API and its test case

---
 mindspore/dataset/__init__.py                 |   4 +-
 mindspore/dataset/engine/__init__.py          |   2 +-
 mindspore/dataset/engine/datasets.py          | 177 +-----------------
 mindspore/dataset/engine/iterators.py         |   2 -
 .../dataset/engine/serializer_deserializer.py |   6 +-
 tests/st/ops/ascend/test_tdt_data_ms.py       |   2 +-
 .../test_datasets_imagenet_distribution.py    |  60 ------
 tests/ut/python/dataset/test_readdir.py       |  69 -------
 .../ut/python/dataset/test_serdes_dataset.py  |   4 +-
 tests/ut/python/dataset/test_storage.py       |  51 -----
 10 files changed, 11 insertions(+), 366 deletions(-)
 delete mode 100644 tests/ut/python/dataset/test_readdir.py
 delete mode 100644 tests/ut/python/dataset/test_storage.py

diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py
index 93c1a6e0472..0631ade36aa 100644
--- a/mindspore/dataset/__init__.py
+++ b/mindspore/dataset/__init__.py
@@ -19,7 +19,7 @@ can also create samplers with this module to sample data.
 """
 
 from .core.configuration import config
-from .engine.datasets import StorageDataset, TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \
+from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \
     GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CelebADataset, TextFileDataset, \
     Schema, Shuffle, zip, RandomDataset
 from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
@@ -27,7 +27,7 @@ from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, Seque
 from .engine.serializer_deserializer import serialize, deserialize, show
 from .engine.graphdata import GraphData
 
-__all__ = ["config", "ImageFolderDatasetV2", "MnistDataset", "StorageDataset",
+__all__ = ["config", "ImageFolderDatasetV2", "MnistDataset",
            "MindDataset", "GeneratorDataset", "TFRecordDataset",
            "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
            "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler",
diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py
index 86d29713324..59dca2f6812 100644
--- a/mindspore/dataset/engine/__init__.py
+++ b/mindspore/dataset/engine/__init__.py
@@ -29,7 +29,7 @@ from .samplers import *
 from ..core.configuration import config, ConfigurationManager
 
 
-__all__ = ["config", "ConfigurationManager", "zip", "StorageDataset",
+__all__ = ["config", "ConfigurationManager", "zip",
            "ImageFolderDatasetV2", "MnistDataset",
            "MindDataset", "GeneratorDataset", "TFRecordDataset",
            "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 687e9ecb770..74647999054 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -22,7 +22,6 @@ import glob
 import json
 import math
 import os
-import random
 import uuid
 import multiprocessing
 import queue
@@ -40,7 +39,7 @@ from mindspore._c_expression import typing
 from mindspore import log as logger
 from . import samplers
 from .iterators import DictIterator, TupleIterator
-from .validators import check, check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
+from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
     check_rename, \
     check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \
@@ -480,7 +479,7 @@ class Dataset:
              If input_columns not provided or empty, all columns will be used.
 
         Args:
-            predicate(callable): python callable which returns a boolean value.
+            predicate(callable): python callable which returns a boolean value, if False then filter the element.
             input_columns: (list[str], optional): List of names of the input columns, when
                 default=None, the predicate will be applied on all columns in the dataset.
             num_parallel_workers (int, optional): Number of workers to process the Dataset
@@ -899,7 +898,7 @@ class Dataset:
 
         def get_distribution(output_dataset):
             dev_id = 0
-            if isinstance(output_dataset, (StorageDataset, MindDataset)):
+            if isinstance(output_dataset, (MindDataset)):
                 return output_dataset.distribution, dev_id
             if isinstance(output_dataset, (Cifar10Dataset, Cifar100Dataset, GeneratorDataset, ImageFolderDatasetV2,
                                            ManifestDataset, MnistDataset, VOCDataset, CelebADataset)):
@@ -984,57 +983,6 @@ class Dataset:
         """Create an Iterator over the dataset."""
         return self.create_tuple_iterator()
 
-    @staticmethod
-    def read_dir(dir_path, schema, columns_list=None, num_parallel_workers=None,
-                 deterministic_output=True, prefetch_size=None, shuffle=False, seed=None, distribution=""):
-        """
-        Append the path of all files in the dir_path to StorageDataset.
-
-        Args:
-            dir_path (str): Path to the directory that contains the dataset.
-            schema (str): Path to the json schema file.
-            columns_list (list[str], optional): List of columns to be read (default=None).
-                If not provided, read all columns.
-            num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel
-                (default=None).
-            deterministic_output (bool, optional): Whether the result of this dataset can be reproduced
-                or not (default=True). If True, performance might be affected.
-            prefetch_size (int, optional): Prefetch number of records ahead of the
-                user's request (default=None).
-            shuffle (bool, optional): Shuffle the list of files in the directory (default=False).
-            seed (int, optional): Create a random generator with a fixed seed. If set to None,
-                create a random seed (default=None).
-            distribution (str, optional): The path of distribution config file (default="").
-
-        Returns:
-            StorageDataset.
-
-        Raises:
-            ValueError: If dataset folder does not exist.
-            ValueError: If dataset folder permission denied.
-        """
-        logger.warning("WARN_DEPRECATED: The usage of read_dir is deprecated, please use TFRecordDataset with GLOB.")
-
-        list_files = []
-
-        if not os.path.isdir(dir_path):
-            raise ValueError("The dataset folder does not exist!")
-        if not os.access(dir_path, os.R_OK):
-            raise ValueError("The dataset folder permission denied!")
-
-        for root, _, files in os.walk(dir_path):
-            for file in files:
-                list_files.append(os.path.join(root, file))
-
-        list_files.sort()
-
-        if shuffle:
-            rand = random.Random(seed)
-            rand.shuffle(list_files)
-
-        return StorageDataset(list_files, schema, distribution, columns_list, num_parallel_workers,
-                              deterministic_output, prefetch_size)
-
     @property
     def input_indexs(self):
         return self._input_indexs
@@ -1818,7 +1766,7 @@ class FilterDataset(DatasetOp):
 
     Args:
         input_dataset: Input Dataset to be mapped.
-        predicate: python callable which returns a boolean value.
+        predicate: python callable which returns a boolean value, if False then filter the element.
         input_columns: (list[str]): List of names of the input columns, when
         default=None, the predicate will be applied all columns in the dataset.
         num_parallel_workers (int, optional): Number of workers to process the Dataset
@@ -2157,123 +2105,6 @@ class TransferDataset(DatasetOp):
         self.iterator = TupleIterator(self)
 
 
-class StorageDataset(SourceDataset):
-    """
-    A source dataset that reads and parses datasets stored on disk in various formats, including TFData format.
-
-    Args:
-        dataset_files (list[str]): List of files to be read.
-        schema (str): Path to the json schema file. If numRows(parsed from schema) is not exist, read the full dataset.
-        distribution (str, optional): Path of distribution config file (default="").
-        columns_list (list[str], optional): List of columns to be read (default=None, read all columns).
-        num_parallel_workers (int, optional): Number of parallel working threads (default=None).
-        deterministic_output (bool, optional): Whether the result of this dataset can be reproduced
-            or not (default=True). If True, performance might be affected.
-        prefetch_size (int, optional): Prefetch number of records ahead of the user's request (default=None).
-
-    Raises:
-        RuntimeError: If schema file failed to read.
-        RuntimeError: If distribution file path is given but failed to read.
-    """
-
-    @check
-    def __init__(self, dataset_files, schema, distribution="", columns_list=None, num_parallel_workers=None,
-                 deterministic_output=None, prefetch_size=None):
-        super().__init__(num_parallel_workers)
-        logger.warning("WARN_DEPRECATED: The usage of StorageDataset is deprecated, please use TFRecordDataset.")
-        self.dataset_files = dataset_files
-        try:
-            with open(schema, 'r') as load_f:
-                json.load(load_f)
-        except json.decoder.JSONDecodeError:
-            raise RuntimeError("Json decode error when load schema file")
-        except Exception:
-            raise RuntimeError("Schema file failed to load")
-
-        if distribution != "":
-            try:
-                with open(distribution, 'r') as load_d:
-                    json.load(load_d)
-            except json.decoder.JSONDecodeError:
-                raise RuntimeError("Json decode error when load distribution file")
-            except Exception:
-                raise RuntimeError("Distribution file failed to load")
-        if self.dataset_files is None:
-            schema = None
-            distribution = None
-        self.schema = schema
-        self.distribution = distribution
-        self.columns_list = columns_list
-        self.deterministic_output = deterministic_output
-        self.prefetch_size = prefetch_size
-
-    def get_args(self):
-        args = super().get_args()
-        args["dataset_files"] = self.dataset_files
-        args["schema"] = self.schema
-        args["distribution"] = self.distribution
-        args["columns_list"] = self.columns_list
-        args["deterministic_output"] = self.deterministic_output
-        args["prefetch_size"] = self.prefetch_size
-        return args
-
-    def get_dataset_size(self):
-        """
-        Get the number of batches in an epoch.
-
-        Return:
-            Number, number of batches.
-        """
-        if self._dataset_size is None:
-            self._get_pipeline_info()
-        return self._dataset_size
-
-    # manually set dataset_size as a temporary solution.
-    def set_dataset_size(self, value):
-        logger.warning("WARN_DEPRECATED: This method is deprecated. Please use get_dataset_size directly.")
-        if value >= 0:
-            self._dataset_size = value
-        else:
-            raise ValueError('set dataset_size with negative value {}'.format(value))
-
-    def num_classes(self):
-        """
-        Get the number of classes in dataset.
-
-        Return:
-            Number, number of classes.
-
-        Raises:
-            ValueError: If dataset type is invalid.
-            ValueError: If dataset is not Imagenet dataset or manifest dataset.
-            RuntimeError: If schema file is given but failed to load.
-        """
-        cur_dataset = self
-        while cur_dataset.input:
-            cur_dataset = cur_dataset.input[0]
-        if not hasattr(cur_dataset, "schema"):
-            raise ValueError("Dataset type is invalid")
-        # Only IMAGENET/MANIFEST support numclass
-        try:
-            with open(cur_dataset.schema, 'r') as load_f:
-                load_dict = json.load(load_f)
-        except json.decoder.JSONDecodeError:
-            raise RuntimeError("Json decode error when load schema file")
-        except Exception:
-            raise RuntimeError("Schema file failed to load")
-        if load_dict["datasetType"] != "IMAGENET" and load_dict["datasetType"] != "MANIFEST":
-            raise ValueError("%s dataset does not support num_classes!" % (load_dict["datasetType"]))
-
-        if self._num_classes is None:
-            self._get_pipeline_info()
-        return self._num_classes
-
-    def is_shuffled(self):
-        return False
-
-    def is_sharded(self):
-        return False
-
 
 class RangeDataset(MappableDataset):
     """
diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py
index b778bdacae7..7cd69f30579 100644
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -168,8 +168,6 @@ class Iterator:
             op_type = OpName.SKIP
         elif isinstance(dataset, de.TakeDataset):
             op_type = OpName.TAKE
-        elif isinstance(dataset, de.StorageDataset):
-            op_type = OpName.STORAGE
         elif isinstance(dataset, de.ImageFolderDatasetV2):
             op_type = OpName.IMAGEFOLDER
         elif isinstance(dataset, de.GeneratorDataset):
diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py
index c509ec10174..688ef167537 100644
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -230,11 +230,7 @@ def create_node(node):
     pyobj = None
     # Find a matching Dataset class and call the constructor with the corresponding args.
     # When a new Dataset class is introduced, another if clause and parsing code needs to be added.
-    if dataset_op == 'StorageDataset':
-        pyobj = pyclass(node['dataset_files'], node['schema'], node.get('distribution'),
-                        node.get('columns_list'), node.get('num_parallel_workers'))
-
-    elif dataset_op == 'ImageFolderDatasetV2':
+    if dataset_op == 'ImageFolderDatasetV2':
         sampler = construct_sampler(node.get('sampler'))
         pyobj = pyclass(node['dataset_dir'], node.get('num_samples'), node.get('num_parallel_workers'),
                         node.get('shuffle'), sampler, node.get('extensions'),
diff --git a/tests/st/ops/ascend/test_tdt_data_ms.py b/tests/st/ops/ascend/test_tdt_data_ms.py
index e4e1b1cf734..889d4883f9c 100644
--- a/tests/st/ops/ascend/test_tdt_data_ms.py
+++ b/tests/st/ops/ascend/test_tdt_data_ms.py
@@ -31,7 +31,7 @@ SCHEMA_DIR = "{0}/resnet_all_datasetSchema.json".format(data_path)
 
 def test_me_de_train_dataset():
     data_list = ["{0}/train-00001-of-01024.data".format(data_path)]
-    data_set = ds.StorageDataset(data_list, schema=SCHEMA_DIR,
+    data_set = ds.TFRecordDataset(data_list, schema=SCHEMA_DIR,
                                  columns_list=["image/encoded", "image/class/label"])
 
     resize_height = 224
diff --git a/tests/ut/python/dataset/test_datasets_imagenet_distribution.py b/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
index 65d432edaf8..e463424949a 100644
--- a/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
+++ b/tests/ut/python/dataset/test_datasets_imagenet_distribution.py
@@ -24,11 +24,6 @@ DATA_DIR = ["../data/dataset/test_tf_file_3_images2/train-0000-of-0001.data",
 
 SCHEMA_DIR = "../data/dataset/test_tf_file_3_images2/datasetSchema.json"
 
-DISTRIBUTION_ALL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionAll.json"
-DISTRIBUTION_UNIQUE_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionUnique.json"
-DISTRIBUTION_RANDOM_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionRandom.json"
-DISTRIBUTION_EQUAL_DIR = "../data/dataset/test_tf_file_3_images2/dataDistributionEqualRows.json"
-
 
 def test_tf_file_normal():
     # apply dataset operations
@@ -42,61 +37,6 @@ def test_tf_file_normal():
     assert num_iter == 12
 
 
-def test_tf_file_distribution_all():
-    # apply dataset operations
-    data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_ALL_DIR)
-    data1 = data1.repeat(2)
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-    assert num_iter == 24
-
-
-def test_tf_file_distribution_unique():
-    data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_UNIQUE_DIR)
-    data1 = data1.repeat(1)
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-    assert num_iter == 4
-
-
-def test_tf_file_distribution_random():
-    data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_RANDOM_DIR)
-    data1 = data1.repeat(1)
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        num_iter += 1
-
-    logger.info("Number of data in data1: {}".format(num_iter))
-    assert num_iter == 4
-
-
-def test_tf_file_distribution_equal_rows():
-    data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, DISTRIBUTION_EQUAL_DIR)
-    data1 = data1.repeat(2)
-    num_iter = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        num_iter += 1
-
-    assert num_iter == 4
-
-
 if __name__ == '__main__':
     logger.info('=======test normal=======')
     test_tf_file_normal()
-
-    logger.info('=======test all=======')
-    test_tf_file_distribution_all()
-
-    logger.info('=======test unique=======')
-    test_tf_file_distribution_unique()
-
-    logger.info('=======test random=======')
-    test_tf_file_distribution_random()
-    logger.info('=======test equal rows=======')
-    test_tf_file_distribution_equal_rows()
diff --git a/tests/ut/python/dataset/test_readdir.py b/tests/ut/python/dataset/test_readdir.py
deleted file mode 100644
index 12649c65977..00000000000
--- a/tests/ut/python/dataset/test_readdir.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import mindspore.dataset as ds
-from mindspore import log as logger
-
-DATA_DIR = "../data/dataset/test_tf_file_3_images/data"
-SCHEMA = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
-COLUMNS = ["label"]
-GENERATE_GOLDEN = False
-
-
-def test_case_0():
-    logger.info("Test 0 readdir")
-
-    # apply dataset operations
-    data1 = ds.engine.Dataset.read_dir(DATA_DIR, SCHEMA, columns_list=None, num_parallel_workers=None,
-                                       deterministic_output=True, prefetch_size=None, shuffle=False, seed=None)
-
-    i = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        logger.info("item[label] is {}".format(item["label"]))
-        i = i + 1
-    assert (i == 3)
-
-
-def test_case_1():
-    logger.info("Test 1 readdir")
-
-    # apply dataset operations
-    data1 = ds.engine.Dataset.read_dir(DATA_DIR, SCHEMA, COLUMNS, num_parallel_workers=None,
-                                       deterministic_output=True, prefetch_size=None, shuffle=True, seed=None)
-
-    i = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        logger.info("item[label] is {}".format(item["label"]))
-        i = i + 1
-    assert (i == 3)
-
-
-def test_case_2():
-    logger.info("Test 2 readdir")
-
-    # apply dataset operations
-    data1 = ds.engine.Dataset.read_dir(DATA_DIR, SCHEMA, columns_list=None, num_parallel_workers=2,
-                                       deterministic_output=False, prefetch_size=16, shuffle=True, seed=10)
-
-    i = 0
-    for item in data1.create_dict_iterator():  # each data is a dictionary
-        logger.info("item[label] is {}".format(item["label"]))
-        i = i + 1
-    assert (i == 3)
-
-
-if __name__ == "__main__":
-    test_case_0()
-    test_case_1()
-    test_case_2()
diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py
index 1fe4d7cb072..121cca437ab 100644
--- a/tests/ut/python/dataset/test_serdes_dataset.py
+++ b/tests/ut/python/dataset/test_serdes_dataset.py
@@ -177,7 +177,7 @@ def test_random_crop():
     SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
     # First dataset
-    data1 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
+    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
     decode_op = vision.Decode()
     random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
     data1 = data1.map(input_columns="image", operations=decode_op)
@@ -192,7 +192,7 @@ def test_random_crop():
     data1_1 = ds.deserialize(input_dict=ds1_dict)
 
     # Second dataset
-    data2 = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
+    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
     data2 = data2.map(input_columns="image", operations=decode_op)
 
     for item1, item1_1, item2 in zip(data1.create_dict_iterator(), data1_1.create_dict_iterator(),
diff --git a/tests/ut/python/dataset/test_storage.py b/tests/ut/python/dataset/test_storage.py
deleted file mode 100644
index 92a689a6897..00000000000
--- a/tests/ut/python/dataset/test_storage.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from util import save_and_check
-
-import mindspore.dataset as ds
-from mindspore import log as logger
-
-DATA_DIR = ["../data/dataset/testTFTestAllTypes/test.data"]
-SCHEMA_DIR = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
-COLUMNS = ["col_1d", "col_2d", "col_3d", "col_binary", "col_float",
-           "col_sint16", "col_sint32", "col_sint64"]
-GENERATE_GOLDEN = False
-
-
-def test_case_storage():
-    """
-    test StorageDataset
-    """
-    logger.info("Test Simple StorageDataset")
-    # define parameters
-    parameters = {"params": {}}
-
-    # apply dataset operations
-    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
-
-    filename = "storage_result.npz"
-    save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
-
-
-def test_case_no_rows():
-    DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
-    SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetNoRowsSchema.json"
-
-    dataset = ds.StorageDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
-    assert dataset.get_dataset_size() == 3
-    count = 0
-    for data in dataset.create_tuple_iterator():
-        count += 1
-    assert count == 3