diff --git a/mindspore/python/mindspore/dataset/engine/__init__.py b/mindspore/python/mindspore/dataset/engine/__init__.py
index ac104f232df..e1f0906f45d 100644
--- a/mindspore/python/mindspore/dataset/engine/__init__.py
+++ b/mindspore/python/mindspore/dataset/engine/__init__.py
@@ -26,15 +26,85 @@ from ..callback import DSCallback, WaitedDSCallback
 from ..core import config
 from .cache_client import DatasetCache
 from .datasets import *
+from .datasets_vision import *
+from .datasets_text import *
+from .datasets_audio import *
+from .datasets_standard_format import *
+from .datasets_user_defined import *
 from .graphdata import GraphData, SamplingStrategy, OutputFormat
 from .iterators import *
 from .samplers import *
 from .serializer_deserializer import compare, deserialize, serialize, show
 
-__all__ = ["CelebADataset", "Cifar100Dataset", "Cifar10Dataset", "CLUEDataset", "CocoDataset", "CSVDataset",
-           "GeneratorDataset", "GraphData", "ImageFolderDataset", "ManifestDataset", "MindDataset", "MnistDataset",
-           "NumpySlicesDataset", "PaddedDataset", "TextFileDataset", "TFRecordDataset", "VOCDataset",
-           "DistributedSampler", "PKSampler", "RandomSampler", "SequentialSampler", "SubsetRandomSampler",
-           "WeightedRandomSampler", "SubsetSampler",
-           "DatasetCache", "DSCallback", "Schema", "WaitedDSCallback", "compare", "deserialize",
-           "serialize", "show", "zip"]
+__all__ = ["Caltech101Dataset",        # vision dataset
+           "Caltech256Dataset",        # vision dataset
+           "CelebADataset",            # vision dataset
+           "Cifar10Dataset",           # vision dataset
+           "Cifar100Dataset",          # vision dataset
+           "CityscapesDataset",        # vision dataset
+           "CocoDataset",              # vision dataset
+           "DIV2KDataset",             # vision dataset
+           "EMnistDataset",            # vision dataset
+           "FakeImageDataset",         # vision dataset
+           "FashionMnistDataset",      # vision dataset
+           "FlickrDataset",            # vision dataset
+           "Flowers102Dataset",        # vision dataset
+           "ImageFolderDataset",       # vision dataset
+           "KMnistDataset",            # vision dataset
+           "ManifestDataset",          # vision dataset
+           "MnistDataset",             # vision dataset
+           "PhotoTourDataset",         # vision dataset
+           "Places365Dataset",         # vision dataset
+           "QMnistDataset",            # vision dataset
+           "RandomDataset",            # vision dataset
+           "SBDataset",                # vision dataset
+           "SBUDataset",               # vision dataset
+           "SemeionDataset",           # vision dataset
+           "STL10Dataset",             # vision dataset
+           "SVHNDataset",              # vision dataset
+           "USPSDataset",              # vision dataset
+           "VOCDataset",               # vision dataset
+           "WIDERFaceDataset",         # vision dataset
+           "AGNewsDataset",            # text dataset
+           "AmazonReviewDataset",      # text dataset
+           "CLUEDataset",              # text dataset
+           "CoNLL2000Dataset",         # text dataset
+           "CSVDataset",               # text dataset
+           "DBpediaDataset",           # text dataset
+           "EnWik9Dataset",            # text dataset
+           "IMDBDataset",              # text dataset
+           "IWSLT2016Dataset",         # text dataset
+           "IWSLT2017Dataset",         # text dataset
+           "PennTreebankDataset",      # text dataset
+           "SogouNewsDataset",         # text dataset
+           "TextFileDataset",          # text dataset
+           "UDPOSDataset",             # text dataset
+           "WikiTextDataset",          # text dataset
+           "YahooAnswersDataset",      # text dataset
+           "YelpReviewDataset",        # text dataset
+           "LJSpeechDataset",          # audio dataset
+           "SpeechCommandsDataset",    # audio dataset
+           "TedliumDataset",           # audio dataset
+           "YesNoDataset",             # audio dataset
+           "MindDataset",              # standard format dataset
+           "TFRecordDataset",          # standard format dataset
+           "GeneratorDataset",         # user defined dataset
+           "NumpySlicesDataset",       # user defined dataset
+           "PaddedDataset",            # user defined dataset
+           "GraphData",                # graph data
+           "DistributedSampler",       # sampler
+           "RandomSampler",            # sampler
+           "SequentialSampler",        # sampler
+           "SubsetRandomSampler",      # sampler
+           "SubsetSampler",            # sampler
+           "PKSampler",                # sampler
+           "WeightedRandomSampler",    # sampler
+           "DatasetCache",
+           "DSCallback",
+           "WaitedDSCallback",
+           "Schema",
+           "compare",
+           "deserialize",
+           "serialize",
+           "show",
+           "zip"]
diff --git a/mindspore/python/mindspore/dataset/engine/datasets.py b/mindspore/python/mindspore/dataset/engine/datasets.py
index 219014a7caf..e66720068f9 100644
--- a/mindspore/python/mindspore/dataset/engine/datasets.py
+++ b/mindspore/python/mindspore/dataset/engine/datasets.py
@@ -19,7 +19,6 @@ high performance and parses data precisely. Some of the operations that are
 provided to users to preprocess data include shuffle, batch, repeat, map, and zip.
 """
 import atexit
-import builtins
 import glob
 import json
 import math
@@ -30,10 +29,7 @@ import time
 import uuid
 import multiprocessing
 from multiprocessing.pool import RUN, TERMINATE
-from multiprocessing.util import Finalize
-import queue
 from enum import Enum
-from functools import partial
 from importlib import import_module
 import sys
 import threading
@@ -43,44 +39,28 @@ import weakref
 import platform
 import psutil
 import numpy as np
-from scipy.io import loadmat
-from PIL import Image
 
 import mindspore._c_dataengine as cde
 from mindspore._c_expression import typing
 
-from mindspore.common import Tensor
 from mindspore import log as logger
 from mindspore.parallel._ps_context import _is_role_pserver, _is_role_sched
-from mindspore.parallel._utils import _get_device_num
 from mindspore.dataset.engine.offload import GetOffloadModel
 
 import mindspore.dataset.transforms.py_transforms as py_transforms
 from mindspore.dataset.text.utils import SentencePieceModel, DE_C_INTER_SENTENCEPIECE_MODE
+from mindspore.parallel._utils import _get_device_num
 
 from . import samplers
 from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
     ITERATORS_LIST, _unset_iterator_cleanup
 from .queue import _SharedQueue
 from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
-    check_rename, check_numpyslicesdataset, check_device_send, check_take, check_project, check_imagefolderdataset, \
-    check_mnist_cifar_dataset, check_manifestdataset, check_tfrecorddataset, check_vocdataset, check_cocodataset, \
-    check_celebadataset, check_minddataset, check_generatordataset, check_sync_wait, check_zip_dataset, \
-    check_add_column, check_textfiledataset, check_concat, check_random_dataset, check_split, \
-    check_bucket_batch_by_length, check_cluedataset, check_save, check_csvdataset, check_paddeddataset, \
-    check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_flickr_dataset, \
-    check_sb_dataset, check_flowers102dataset, check_cityscapes_dataset, check_usps_dataset, check_div2k_dataset, \
-    check_sbu_dataset, check_qmnist_dataset, check_emnist_dataset, check_fake_image_dataset, check_places365_dataset, \
-    check_photo_tour_dataset, check_ag_news_dataset, check_dbpedia_dataset, check_lj_speech_dataset, \
-    check_yes_no_dataset, check_speech_commands_dataset, check_tedlium_dataset, check_svhn_dataset, \
-    check_stl10_dataset, check_yelp_review_dataset, check_penn_treebank_dataset, check_iwslt2016_dataset, \
-    check_iwslt2017_dataset, check_sogou_news_dataset, check_yahoo_answers_dataset, check_udpos_dataset, \
-    check_conll2000_dataset, check_amazon_review_dataset, check_semeion_dataset, check_caltech101_dataset, \
-    check_caltech256_dataset, check_wiki_text_dataset, check_imdb_dataset, check_wider_face_dataset, \
-    check_en_wik9_dataset
-from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
-    get_prefetch_size
-from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
+    check_rename, check_device_send, check_take, check_project, \
+    check_sync_wait, check_zip_dataset, check_add_column, check_concat, check_split, check_bucket_batch_by_length, \
+    check_save, check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send
+from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers
+from ..core.datatypes import mstype_to_detype
 from ..core.validator_helpers import replace_none
 from ..core.py_util_helpers import ExceptionHandler
 from ..transforms.py_transforms_util import FuncWrapper
@@ -333,6 +313,8 @@ class Dataset:
                 for d in item.children:
                     temp.append(d)
                     op_name[str(d)] = operator_id
+
+                    from mindspore.dataset.engine.datasets_user_defined import GeneratorDataset
                     if isinstance(d, GeneratorDataset) and d.sample_fn and d.sample_fn.pids:
                         generator_process[operator_id] = [d.num_parallel_workers, set(d.sample_fn.pids)]
 
@@ -2200,6 +2182,35 @@ class BucketBatchByLengthDataset(Dataset):
                                            self.pad_to_bucket_boundary, self.drop_remainder)
 
 
+def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
+    """
+    Check sufficient shared memory is available for shared memory queues
+    when training in parallel mode.
+    """
+    threshold_ratio = 0.8
+    if platform.system().lower() not in {"windows", "darwin"}:
+        device_num = _get_device_num()
+        # In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
+        # on the ascend server is 8.
+        if device_num > 1 and context.get_context("device_target") == "Ascend":
+            device_num = min(device_num, 8)
+        shm_estimate_usage = device_num * num_worker * num_queues * \
+                             (queue_size + 2) * max_rowsize * 1024 * 1024
+        try:
+            shm_available = psutil.disk_usage('/dev/shm').free
+            if shm_estimate_usage >= threshold_ratio * shm_available:
+                raise RuntimeError(
+                    "Insufficient shared memory available. Required: {}, Available: {}. "
+                    "The required memory can't exceed 80% of the available shared memory, "
+                    "it's recommended to reduce memory usage by following methods:\n"
+                    "1. reduce value of parameter max_rowsize or num_parallel_workers.\n"
+                    "2. reduce prefetch size by set_prefetch_size().\n"
+                    "3. disable shared memory by set_enable_shared_mem()."
+                    .format(shm_estimate_usage, shm_available))
+        except FileNotFoundError:
+            raise RuntimeError("Expected /dev/shm to exist.")
+
+
 class BatchDataset(Dataset):
     """
     The result of applying Batch operator to the input dataset.
@@ -3125,6 +3136,8 @@ class ConcatDataset(Dataset):
             tem_list = [-1, -1]
             self._children_start_end_index_.append(tem_list)
             dataset_len = self.children_sizes_[index]
+
+            from mindspore.dataset.engine.datasets_user_defined import GeneratorDataset
             if isinstance(child, GeneratorDataset) and not hasattr(child.source, "__getitem__"):
                 dataset_len = 0
                 self.children_sizes_[index] = 0
@@ -3421,2856 +3434,6 @@ class RangeDataset(MappableDataset):
         return self.dataset_size
 
 
-class FashionMnistDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the FASHION-MNIST dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`. `train` will read from 60,000
-            train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
-            (default=None, will read all samples)
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> fashion_mnist_dataset_dir = "/path/to/fashion_mnist_dataset_directory"
-        >>>
-        >>> # Read 3 samples from FASHIONMNIST dataset
-        >>> dataset = ds.FashionMnistDataset(dataset_dir=fashion_mnist_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In FASHIONMNIST dataset, each dictionary has keys "image" and "label"
-
-    About Fashion-MNIST dataset:
-
-    Fashion-MNIST is a dataset of Zalando's article images—consisting of a training set of 60,000 examples and
-    a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.
-    We intend Fashion-MNIST to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking
-    machine learning algorithms. It shares the same image size and structure of training and testing splits.
-
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── fashionmnist_dataset_dir
-             ├── t10k-images-idx3-ubyte
-             ├── t10k-labels-idx1-ubyte
-             ├── train-images-idx3-ubyte
-             └── train-labels-idx1-ubyte
-
-    Citation:
-
-    .. code-block::
-
-        @online{xiao2017/online,
-          author       = {Han Xiao and Kashif Rasul and Roland Vollgraf},
-          title        = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
-          date         = {2017-08-28},
-          year         = {2017},
-          eprintclass  = {cs.LG},
-          eprinttype   = {arXiv},
-          eprint       = {cs.LG/1708.07747},
-        }
-    """
-
-    @check_mnist_cifar_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.FashionMnistNode(self.dataset_dir, self.usage, self.sampler)
-
-
-class ImageFolderDataset(MappableDataset):
-    """
-    A source dataset that reads images from a tree of directories.
-    All images within one folder have the same label.
-
-    The generated dataset has two columns: :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        extensions (list[str], optional): List of file extensions to be
-            included in the dataset (default=None).
-        class_indexing (dict, optional): A str-to-int mapping from folder name to index
-            (default=None, the folder names will be sorted
-            alphabetically and each class will be given a
-            unique index starting from 0).
-        decode (bool, optional): Decode the images after reading (default=False).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        RuntimeError: If class_indexing is not a dictionary.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
-        >>>
-        >>> # 1) Read all samples (image files) in image_folder_dataset_dir with 8 threads
-        >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir,
-        ...                                 num_parallel_workers=8)
-        >>>
-        >>> # 2) Read all samples (image files) from folder cat and folder dog with label 0 and 1
-        >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir,
-        ...                                 class_indexing={"cat":0, "dog":1})
-        >>>
-        >>> # 3) Read all samples (image files) in image_folder_dataset_dir with extensions .JPEG and .png (case sensitive)
-        >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir,
-        ...                                 extensions=[".JPEG", ".png"])
-
-    About ImageFolderDataset:
-
-    You can construct the following directory structure from your dataset files and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── image_folder_dataset_directory
-             ├── class1
-             │    ├── 000000000001.jpg
-             │    ├── 000000000002.jpg
-             │    ├── ...
-             ├── class2
-             │    ├── 000000000001.jpg
-             │    ├── 000000000002.jpg
-             │    ├── ...
-             ├── class3
-             │    ├── 000000000001.jpg
-             │    ├── 000000000002.jpg
-             │    ├── ...
-             ├── classN
-             ├── ...
-    """
-
-    @check_imagefolderdataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
-                 extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.extensions = replace_none(extensions, [])
-        self.class_indexing = replace_none(class_indexing, {})
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing)
-
-
-class IMDBDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Internet Movie Database (IMDb).
-
-    The generated dataset has two columns: :py:obj:`[text, label]`.
-    The tensor of column :py:obj:`text` is of the string type.
-    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`
-            (default=None, will read all samples).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all samples).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - The shape of the test column.
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> imdb_dataset_dir = "/path/to/imdb_dataset_directory"
-        >>>
-        >>> # 1) Read all samples (text files) in imdb_dataset_dir with 8 threads
-        >>> dataset = ds.IMDBDataset(dataset_dir=imdb_dataset_dir, num_parallel_workers=8)
-        >>>
-        >>> # 2) Read train samples (text files).
-        >>> dataset = ds.IMDBDataset(dataset_dir=imdb_dataset_dir, usage="train")
-
-    About IMDBDataset:
-
-    The IMDB dataset contains 50, 000 highly polarized reviews from the Internet Movie Database (IMDB). The data set
-    was divided into 25 000 comments for training and 25 000 comments for testing, with both the training set and test
-    set containing 50% positive and 50% negative comments. Train labels and test labels are all lists of 0 and 1, where
-    0 stands for negative and 1 for positive.
-
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── imdb_dataset_directory
-             ├── train
-             │    ├── pos
-             │    │    ├── 0_9.txt
-             │    │    ├── 1_7.txt
-             │    │    ├── ...
-             │    ├── neg
-             │    │    ├── 0_3.txt
-             │    │    ├── 1_1.txt
-             │    │    ├── ...
-             ├── test
-             │    ├── pos
-             │    │    ├── 0_10.txt
-             │    │    ├── 1_10.txt
-             │    │    ├── ...
-             │    ├── neg
-             │    │    ├── 0_2.txt
-             │    │    ├── 1_3.txt
-             │    │    ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @InProceedings{maas-EtAl:2011:ACL-HLT2011,
-          author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan
-                        and  Ng, Andrew Y.  and  Potts, Christopher},
-          title     = {Learning Word Vectors for Sentiment Analysis},
-          booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics:
-                        Human Language Technologies},
-          month     = {June},
-          year      = {2011},
-          address   = {Portland, Oregon, USA},
-          publisher = {Association for Computational Linguistics},
-          pages     = {142--150},
-          url       = {http://www.aclweb.org/anthology/P11-1015}
-        }
-    """
-
-    @check_imdb_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.IMDBNode(self.dataset_dir, self.usage, self.sampler)
-
-
-class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses IWSLT2016 datasets.
-
-    The generated dataset has two columns: :py:obj:`[text, translation]`.
-    The tensor of column :py:obj: `text` is of the string type.
-    The tensor of column :py:obj: `translation` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Acceptable usages include "train", "valid", "test" and "all" (default=None, all samples).
-        language_pair (sequence, optional): Sequence containing source and target language, supported values are
-            (`en`, `fr`), ("en", "de"), ("en", "cs"), ("en", "ar"), ("fr", "en"), ("de", "en"), ("cs", "en"),
-            ("ar", "en") (default=("de", "en")).
-        valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
-            of valid_set type will be read, supported values are "dev2010", "tst2010", "tst2011", "tst2012", "tst2013"
-            and "tst2014" (default="tst2013").
-        test_set (str, optional): A string to identify test set, when usage is test or all, the test set of test_set
-            type will be read, supported values are "dev2010", "tst2010", "tst2011", "tst2012", "tst2013" and "tst2014"
-            (default="tst2014").
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> iwslt2016_dataset_dir = "/path/to/iwslt2016_dataset_dir"
-        >>> dataset = ds.IWSLT2016Dataset(dataset_files=iwslt2016_dataset_dir, usage='all',
-        ...                               language_pair=('de', 'en'), valid_set='tst2013', test_set='tst2014')
-
-    About IWSLT2016 dataset:
-
-    IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
-    of oral translation. The MT task of the IWSLT evaluation activity constitutes a data set, which can be publicly
-    obtained through the WIT3 website wit3.fbk.eu. The IWSLT2016 data set includes translations from English to Arabic,
-    Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
-
-    You can unzip the original IWSLT2016 dataset files into this directory structure and read by MindSpore's API. After
-    decompression, you also need to decompress the data set to be read in the specified folder. For example, if you want
-    to read the data set of de-en, you need to unzip the tgz file in the de/en directory, the data set is in the
-    unzipped folder.
-
-    .. code-block::
-
-        .
-        └── iwslt2016_dataset_directory
-             ├── subeval_files
-             └── texts
-                  ├── ar
-                  │    └── en
-                  │        └── ar-en
-                  ├── cs
-                  │    └── en
-                  │        └── cs-en
-                  ├── de
-                  │    └── en
-                  │        └── de-en
-                  │            ├── IWSLT16.TED.dev2010.de-en.de.xml
-                  │            ├── train.tags.de-en.de
-                  │            ├── ...
-                  ├── en
-                  │    ├── ar
-                  │    │   └── en-ar
-                  │    ├── cs
-                  │    │   └── en-cs
-                  │    ├── de
-                  │    │   └── en-de
-                  │    └── fr
-                  │        └── en-fr
-                  └── fr
-                       └── en
-                           └── fr-en
-
-    Citation:
-
-    .. code-block::
-
-        @inproceedings{cettoloEtAl:EAMT2012,
-        Address = {Trento, Italy},
-        Author = {Mauro Cettolo and Christian Girardi and Marcello Federico},
-        Booktitle = {Proceedings of the 16$^{th}$ Conference of the European Association for Machine Translation
-                     (EAMT)},
-        Date = {28-30},
-        Month = {May},
-        Pages = {261--268},
-        Title = {WIT$^3$: Web Inventory of Transcribed and Translated Talks},
-        Year = {2012}}
-    """
-
-    @check_iwslt2016_dataset
-    def __init__(self, dataset_dir, usage=None, language_pair=None, valid_set=None, test_set=None,
-                 num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, num_parallel_workers=None,
-                 cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-        self.language_pair = replace_none(language_pair, ["de", "en"])
-        self.valid_set = replace_none(valid_set, 'tst2013')
-        self.test_set = replace_none(test_set, 'tst2014')
-
-    def parse(self, children=None):
-        return cde.IWSLT2016Node(self.dataset_dir, self.usage, self.language_pair, self.valid_set, self.test_set,
-                                 self.num_samples, self.shuffle_flag, self.num_shards, self.shard_id)
-
-
-class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses IWSLT2017 datasets.
-
-    The generated dataset has two columns: :py:obj:`[text, translation]`.
-    The tensor of column :py:obj:`text` is of the string type.
-    The tensor of column :py:obj:`translation` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Acceptable usages include "train", "valid", "test" and "all" (default=None, all samples).
-        language_pair (list, optional): List containing src and tgt language, supported values are ("en", "nl"),
-            ("en", "de"), ("en", "it"), ("en", "ro"), ("nl", "en"), ("nl", "de"), ("nl", "it"), ("nl", "ro"),
-            ("de", "en"), ("de", "nl"), ("de", "it"), ("de", "ro"), ("it", "en"), ("it", "nl"), ("it", "de"),
-            ("it", "ro"), (`ro`, `en`), (`ro`, `nl`), (`ro`, `de`), (`ro`, `it`) (default=(`de`, `en`)).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> iwslt2017_dataset_dir = "/path/to/iwslt207_dataset_dir"
-        >>> dataset = ds.IWSLT2017Dataset(dataset_files=iwslt2017_dataset_dir, usage='all', language_pair=('de', 'en'))
-
-    About IWSLT2017 dataset:
-
-    IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
-    of oral translation. The MT task of the IWSLT evaluation activity constitutes a data set, which can be publicly
-    obtained through the WIT3 website wit3.fbk.eu. The IWSLT2017 data set involves German, English, Italian, Dutch, and
-    Romanian. The data set includes translations in any two different languages.
-
-    You can unzip the original IWSLT2017 dataset files into this directory structure and read by MindSpore's API. You
-    need to decompress the dataset package in texts/DeEnItNlRo/DeEnItNlRo directory to get the DeEnItNlRo-DeEnItNlRo
-    subdirectory.
-
-    .. code-block::
-
-        .
-        └── iwslt2017_dataset_directory
-            └── DeEnItNlRo
-                └── DeEnItNlRo
-                    └── DeEnItNlRo-DeEnItNlRo
-                        ├── IWSLT17.TED.dev2010.de-en.de.xml
-                        ├── train.tags.de-en.de
-                        ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @inproceedings{cettoloEtAl:EAMT2012,
-        Address = {Trento, Italy},
-        Author = {Mauro Cettolo and Christian Girardi and Marcello Federico},
-        Booktitle = {Proceedings of the 16$^{th}$ Conference of the European Association for Machine Translation
-                     (EAMT)},
-        Date = {28-30},
-        Month = {May},
-        Pages = {261--268},
-        Title = {WIT$^3$: Web Inventory of Transcribed and Translated Talks},
-        Year = {2012}}
-    """
-
-    @check_iwslt2017_dataset
-    def __init__(self, dataset_dir, usage=None, language_pair=None, num_samples=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, num_parallel_workers=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-        self.language_pair = replace_none(language_pair, ["de", "en"])
-
-    def parse(self, children=None):
-        return cde.IWSLT2017Node(self.dataset_dir, self.usage, self.language_pair, self.num_samples,
-                                 self.shuffle_flag, self.num_shards, self.shard_id)
-
-
-class KMnistDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the KMNIST dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 60,000
-            train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
-            (default=None, will read all samples)
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If `dataset_dir` does not contain data files.
-        RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
-        RuntimeError: If `sampler` and `shuffle` are specified at the same time.
-        RuntimeError: If `sampler` and sharding are specified at the same time.
-        RuntimeError: If `num_shards` is specified but `shard_id` is None.
-        RuntimeError: If `shard_id` is specified but `num_shards` is None.
-        ValueError: If `shard_id` is invalid (out of range [0, `num_shards`]).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> kmnist_dataset_dir = "/path/to/kmnist_dataset_directory"
-        >>>
-        >>> # Read 3 samples from KMNIST dataset
-        >>> dataset = ds.KMnistDataset(dataset_dir=kmnist_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In kmnist_dataset dataset, each dictionary has keys "image" and "label"
-
-    About KMNIST dataset:
-
-    KMNIST is a dataset, adapted from Kuzushiji Dataset, as a drop-in replacement for MNIST dataset,
-    which is the most famous dataset in the machine learning community.
-
-    Here is the original KMNIST dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── kmnist_dataset_dir
-             ├── t10k-images-idx3-ubyte
-             ├── t10k-labels-idx1-ubyte
-             ├── train-images-idx3-ubyte
-             └── train-labels-idx1-ubyte
-
-    Citation:
-
-    .. code-block::
-
-        @online{clanuwat2018deep,
-          author       = {Tarin Clanuwat and Mikel Bober-Irizar and Asanobu Kitamoto and
-                           Alex Lamb and Kazuaki Yamamoto and David Ha},
-          title        = {Deep Learning for Classical Japanese Literature},
-          date         = {2018-12-03},
-          year         = {2018},
-          eprintclass  = {cs.CV},
-          eprinttype   = {arXiv},
-          eprint       = {cs.CV/1812.01718},
-        }
-    """
-
-    @check_mnist_cifar_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.KMnistNode(self.dataset_dir, self.usage, self.sampler)
-
-
-class MnistDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the MNIST dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 60,000
-            train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
-            (default=None, will read all samples)
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
-        >>>
-        >>> # Read 3 samples from MNIST dataset
-        >>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In mnist_dataset dataset, each dictionary has keys "image" and "label"
-
-    About MNIST dataset:
-
-    The MNIST database of handwritten digits has a training set of 60,000 examples,
-    and a test set of 10,000 examples. It is a subset of a larger set available from
-    NIST. The digits have been size-normalized and centered in a fixed-size image.
-
-    Here is the original MNIST dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── mnist_dataset_dir
-             ├── t10k-images-idx3-ubyte
-             ├── t10k-labels-idx1-ubyte
-             ├── train-images-idx3-ubyte
-             └── train-labels-idx1-ubyte
-
-    Citation:
-
-    .. code-block::
-
-        @article{lecun2010mnist,
-        title        = {MNIST handwritten digit database},
-        author       = {LeCun, Yann and Cortes, Corinna and Burges, CJ},
-        journal      = {ATT Labs [Online]},
-        volume       = {2},
-        year         = {2010},
-        howpublished = {http://yann.lecun.com/exdb/mnist}
-        }
-    """
-
-    @check_mnist_cifar_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.MnistNode(self.dataset_dir, self.usage, self.sampler)
-
-
-class PennTreebankDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses PennTreebank datasets.
-
-    The generated dataset has one column :py:obj:`[text]`.
-    The tensor of column :py:obj:`text` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Acceptable usages include `train`, `test`, 'valid' and `all`.
-            'train' will read from 42,068 train samples of string type,
-            'test' will read from 3,370 test samples of string type,
-            'valid' will read from 3,761 test samples of string type,
-            'all' will read from all 49,199 samples of string type (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Examples:
-        >>> penn_treebank_dataset_dir = "/path/to/penn_treebank_dataset_directory"
-        >>> dataset = ds.PennTreebankDataset(dataset_dir=penn_treebank_dataset_dir, usage='all')
-
-    About PennTreebank dataset:
-
-    Penn Treebank (PTB) dataset, is widely used in machine learning for NLP (Natural Language Processing)
-    research. Word-level PTB does not contain capital letters, numbers, and punctuations, and the vocabulary
-    is capped at 10k unique words, which is relatively small in comparison to most modern datasets which
-    can result in a larger number of out of vocabulary tokens.
-
-    Here is the original PennTreebank dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── PennTreebank_dataset_dir
-             ├── ptb.test.txt
-             ├── ptb.train.txt
-             └── ptb.valid.txt
-
-    Citation:
-
-    .. code-block::
-
-        @techreport{Santorini1990,
-          added-at = {2014-03-26T23:25:56.000+0100},
-          author = {Santorini, Beatrice},
-          biburl = {https://www.bibsonomy.org/bibtex/234cdf6ddadd89376090e7dada2fc18ec/butonic},
-          file = {:Santorini - Penn Treebank tag definitions.pdf:PDF},
-          institution = {Department of Computer and Information Science, University of Pennsylvania},
-          interhash = {818e72efd9e4b5fae3e51e88848100a0},
-          intrahash = {34cdf6ddadd89376090e7dada2fc18ec},
-          keywords = {dis pos tagging treebank},
-          number = {MS-CIS-90-47},
-          timestamp = {2014-03-26T23:25:56.000+0100},
-          title = {Part-of-speech tagging guidelines for the {P}enn {T}reebank {P}roject},
-          url = {ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz},
-          year = 1990
-        }
-    """
-
-    @check_penn_treebank_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.PennTreebankNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                                    self.shard_id)
-
-
-class PhotoTourDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the PhotoTour dataset.
-
-    The generated dataset with different usage has different output columns.
-    If train, the generated dataset has one column :py:obj:`[image]`,
-    else three columns :py:obj:`[image1, image2, matches]`.
-    The tensor of column :py:obj:`image`, :py:obj:`image1` and :py:obj:`image2` is of the uint8 type.
-    The tensor of column :py:obj:`matches` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        name (str): Name of the dataset to load,
-            should be one of 'notredame', 'yosemite', 'liberty', 'notredame_harris',
-            'yosemite_harris' or 'liberty_harris'.
-        usage (str, optional): Usage of the dataset, can be `train` or `test` (Default=None, will be set to 'train').
-            When usage is `train`, number of samples for each `name` is
-            {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092, 'liberty_harris': 379587,
-             'yosemite_harris': 450912, 'notredame_harris': 325295}.
-            When usage is `test`, will read 100,000 samples for testing.
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If usage is not in ["train", "test"].
-        ValueError: If name is not in ["notredame", "yosemite", "liberty",
-            "notredame_harris", "yosemite_harris", "liberty_harris"].
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
-          below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 64 64 1
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> # Read 3 samples from PhotoTour dataset.
-        >>> dataset = ds.PhotoTourDataset(dataset_dir="/path/to/photo_tour_dataset_directory",
-        ...                               name='liberty', usage='train', num_samples=3)
-        >>>
-        >>> # In PhotoTourDataset dataset, if usage is 'train', each dictionary has key "image",
-        >>> # else has keys "image1" "image2" and "matches".
-
-    About PhotoTour dataset:
-
-    The data is taken from Photo Tourism reconstructions from Trevi Fountain (Rome), Notre Dame (Paris) and Half
-    Dome (Yosemite). Each dataset consists of a series of corresponding patches, which are obtained by projecting
-    3D points from Photo Tourism reconstructions back into the original images.
-
-    The dataset consists of 1024 x 1024 bitmap (.bmp) images, each containing a 16 x 16 array of image patches.
-    Each patch is sampled as 64 x 64 grayscale, with a canonical scale and orientation. For details of how the scale
-    and orientation is established, please see the paper. An associated metadata file info.txt contains the match
-    information. Each row of info.txt corresponds to a separate patch, with the patches ordered from left to right and
-    top to bottom in each bitmap image. The first number on each row of info.txt is the 3D point ID from which that
-    patch was sampled -- patches with the same 3D point ID are projected from the same 3D point (into different images).
-    The second number in info.txt corresponds to the image from which the patch was sampled, and is not used at present.
-
-    You can unzip the original PhotoTour dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── photo_tour_dataset_directory
-            ├── liberty/
-            │    ├── info.txt                 // two columns: 3D_point_ID, unused
-            │    ├── m50_100000_100000_0.txt  // seven columns: patch_ID1, 3D_point_ID1, unused1,
-            │    │                            // patch_ID2, 3D_point_ID2, unused2, unused3
-            │    ├── patches0000.bmp          // 1024*1024 pixels, with 16 * 16 patches.
-            │    ├── patches0001.bmp
-            │    ├── ...
-            ├── yosemite/
-            │    ├── ...
-            ├── notredame/
-            │    ├── ...
-            ├── liberty_harris/
-            │    ├── ...
-            ├── yosemite_harris/
-            │    ├── ...
-            ├── notredame_harris/
-            │    ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @INPROCEEDINGS{4269996,
-            author={Winder, Simon A. J. and Brown, Matthew},
-            booktitle={2007 IEEE Conference on Computer Vision and Pattern Recognition},
-            title={Learning Local Image Descriptors},
-            year={2007},
-            volume={},
-            number={},
-            pages={1-8},
-            doi={10.1109/CVPR.2007.382971}
-        }
-    """
-
-    @check_photo_tour_dataset
-    def __init__(self, dataset_dir, name, usage=None, num_samples=None, num_parallel_workers=None,
-                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.name = name
-        self.usage = replace_none(usage, "train")
-
-    def parse(self, children=None):
-        return cde.PhotoTourNode(self.dataset_dir, self.name, self.usage, self.sampler)
-
-
-class Places365Dataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the Places365 dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train-standard`, `train-challenge` or `val`
-            (default=None, will be set to 'train-standard').
-        small (bool, optional): Use 256 * 256 images (True) or high resolution images (False) (default=False).
-        decode (bool, optional): Decode the images after reading (default=True).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-        ValueError: If usage is not in ["train-standard", "train-challenge", "val"].
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> place365_dataset_dir = "/path/to/place365_dataset_directory"
-        >>>
-        >>> # Read 3 samples from Places365 dataset
-        >>> dataset = ds.Places365Dataset(dataset_dir=place365_dataset_dir, usage='train-standard',
-        ...                               small=True, decode=True, num_samples=3)
-        >>>
-        >>> # In places365 dataset, each dictionary has keys "image" and "label".
-
-    About Places365 dataset:
-
-    Convolutional neural networks (CNNs) trained on the Places2 Database can be used for scene recognition as well as
-    generic deep scene features for visual recognition.
-
-    The author releases the data of Places365-Standard and the data of Places365-Challenge to the public.
-    Places365-Standard is the core set of Places2 Database, which has been used to train the Places365-CNNs. The author
-    will add other kinds of annotation on the Places365-Standard in the future. Places365-Challenge is the competition
-    set of Places2 Database, which has 6.2 million extra images compared to the Places365-Standard.
-    The Places365-Challenge will be used for the Places Challenge 2016.
-
-    You can unzip the original Places365 dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └─├── categories_places365.txt
-            ├── places365_train-standard.txt
-            ├── places365_train-challenge.txt
-            ├── val_large/
-            │    ├── Places365_val_00000001.jpg
-            │    ├── Places365_val_00000002.jpg
-            │    ├── Places365_val_00000003.jpg
-            │    ├── ...
-            ├── val_256/
-            │    ├── ...
-            ├── data_large_standard/
-            │    ├── ...
-            ├── data_256_standard/
-            │    ├── ...
-            ├── data_large_challenge/
-            │    ├── ...
-            ├── data_256_challenge /
-            │    ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        article{zhou2017places,
-            title={Places: A 10 million Image Database for Scene Recognition},
-            author={Zhou, Bolei and Lapedriza, Agata and Khosla, Aditya and Oliva, Aude and Torralba, Antonio},
-            journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
-            year={2017},
-            publisher={IEEE}
-        }
-    """
-
-    @check_places365_dataset
-    def __init__(self, dataset_dir, usage=None, small=True, decode=False, num_samples=None, num_parallel_workers=None,
-                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = os.path.abspath(dataset_dir)
-        self.usage = replace_none(usage, "train-standard")
-        self.small = small
-        self.decode = decode
-
-    def parse(self, children=None):
-        return cde.Places365Node(self.dataset_dir, self.usage, self.small, self.decode, self.sampler)
-
-
-class QMnistDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the QMNIST dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar when `compat` is True else a tensor both of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test`, `test10k`, `test50k`, `nist`
-            or `all` (default=None, will read all samples).
-        compat (bool, optional): Whether the label for each example is class number (compat=True) or the full QMNIST
-            information (compat=False) (default=True).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> qmnist_dataset_dir = "/path/to/qmnist_dataset_directory"
-        >>>
-        >>> # Read 3 samples from QMNIST train dataset
-        >>> dataset = ds.QMnistDataset(dataset_dir=qmnist_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In QMNIST dataset, each dictionary has keys "image" and "label"
-
-    About QMNIST dataset:
-
-    The QMNIST dataset was generated from the original data found in the NIST Special Database 19 with the goal to
-    match the MNIST preprocessing as closely as possible.
-    Through an iterative process, researchers tried to generate an additional 50k images of MNIST-like data.
-    They started with a reconstruction process given in the paper and used the Hungarian algorithm to find the best
-    matches between the original MNIST samples and their reconstructed samples.
-
-    Here is the original QMNIST dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── qmnist_dataset_dir
-             ├── qmnist-train-images-idx3-ubyte
-             ├── qmnist-train-labels-idx2-int
-             ├── qmnist-test-images-idx3-ubyte
-             ├── qmnist-test-labels-idx2-int
-             ├── xnist-images-idx3-ubyte
-             └── xnist-labels-idx2-int
-
-    Citation:
-
-    .. code-block::
-
-        @incollection{qmnist-2019,
-           title = "Cold Case: The Lost MNIST Digits",
-           author = "Chhavi Yadav and L\'{e}on Bottou",\
-           booktitle = {Advances in Neural Information Processing Systems 32},
-           year = {2019},
-           publisher = {Curran Associates, Inc.},
-        }
-    """
-
-    @check_qmnist_dataset
-    def __init__(self, dataset_dir, usage=None, compat=True, num_samples=None, num_parallel_workers=None,
-                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-        self.compat = compat
-
-    def parse(self, children=None):
-        return cde.QMnistNode(self.dataset_dir, self.usage, self.compat, self.sampler)
-
-
-class MindDataset(MappableDataset, TextBaseDataset):
-    """
-    A source dataset for reading and parsing MindRecord dataset.
-
-    The columns of generated dataset depend on the source MindRecord files.
-
-    Args:
-        dataset_files (Union[str, list[str]]): If dataset_file is a str, it represents for
-            a file name of one component of a mindrecord source, other files with identical source
-            in the same path will be found and loaded automatically. If dataset_file is a list,
-            it represents for a list of dataset files to be read directly.
-        columns_list (list[str], optional): List of columns to be read (default=None).
-        num_parallel_workers (int, optional): The number of readers (default=None).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=None, performs global shuffle).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are three levels of shuffling:
-
-            - Shuffle.GLOBAL: Global shuffle of all rows of data in dataset.
-
-            - Shuffle.FILES: Shuffle the file sequence but keep the order of data within each file.
-
-            - Shuffle.INFILE: Keep the file sequence the same but shuffle the data within each file.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, sampler is exclusive
-            with shuffle and block_reader). Support list: SubsetRandomSampler,
-            PkSampler, RandomSampler, SequentialSampler, DistributedSampler.
-        padded_sample (dict, optional): Samples will be appended to dataset, where
-            keys are the same as column_list.
-        num_padded (int, optional): Number of padding samples. Dataset size
-            plus num_padded should be divisible by num_shards.
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, all samples).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_files are not valid or do not exist.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> mind_dataset_dir = ["/path/to/mind_dataset_file"] # contains 1 or multiple MindRecord files
-        >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
-    """
-
-    def parse(self, children=None):
-        return cde.MindDataNode(self.dataset_files, self.columns_list, self.sampler, self.new_padded_sample,
-                                self.num_padded, shuffle_to_shuffle_mode(self.shuffle_option))
-
-    @check_minddataset
-    def __init__(self, dataset_files, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None,
-                 shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle_to_bool(shuffle), num_shards=num_shards, shard_id=shard_id, cache=cache)
-        if shuffle is not None and not isinstance(shuffle, (bool, Shuffle)):
-            raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.GLOBAL' or "
-                            "'Shuffle.FILES' or 'Shuffle.INFILE'.")
-        if num_samples and shuffle in (Shuffle.FILES, Shuffle.INFILE):
-            raise ValueError("'Shuffle.FILES' or 'Shuffle.INFILE' and 'num_samples' "
-                             "cannot be specified at the same time.")
-        self.shuffle_option = shuffle
-        if isinstance(dataset_files, list):
-            self.load_dataset = False
-        else:
-            self.load_dataset = True
-        self.dataset_files = dataset_files
-        self.columns_list = replace_none(columns_list, [])
-
-        if shuffle is False:
-            logger.warning("WARN: global shuffle is not used.")
-
-        if sampler is not None:
-            if isinstance(sampler, (
-                    samplers.SubsetRandomSampler, samplers.SubsetSampler, samplers.PKSampler,
-                    samplers.DistributedSampler,
-                    samplers.RandomSampler, samplers.SequentialSampler)) is False:
-                raise ValueError("The sampler is not supported yet.")
-
-        self.padded_sample = padded_sample
-        self.num_padded = replace_none(num_padded, 0)
-
-        self.new_padded_sample = {}
-        if padded_sample:
-            for k, v in padded_sample.items():
-                if isinstance(v, np.ndarray):
-                    self.new_padded_sample[k] = v.tobytes()
-                else:
-                    self.new_padded_sample[k] = v
-
-
-def _iter_fn(dataset, num_samples):
-    """
-    Generator function wrapper for iterable dataset.
-    """
-    if num_samples is not None and num_samples != 0:
-        ds_iter = iter(dataset)
-        for _ in range(num_samples):
-            try:
-                val = next(ds_iter)
-            except StopIteration:
-                return
-            # convert output tensors to ndarrays
-            yield _convert_row(val)
-    else:
-        for val in dataset:
-            # convert output tensors to ndarrays
-            yield _convert_row(val)
-
-
-def _generator_fn(generator, num_samples):
-    """
-    Generator function wrapper for generator function dataset.
-    """
-    if num_samples is not None and num_samples != 0:
-        gen_iter = generator()
-        for _ in range(num_samples):
-            try:
-                val = next(gen_iter)
-            except StopIteration:
-                return
-            yield val
-    else:
-        gen_iter = generator()
-        for val in gen_iter:
-            yield val
-
-
-def _cpp_sampler_fn(sample_ids, dataset):
-    """
-    Generator function wrapper for mappable dataset with cpp sampler.
-    """
-    if not isinstance(sample_ids, np.ndarray):
-        raise RuntimeError("Sample IDs are not in a numpy array.")
-    if sample_ids.size == 0:
-        raise RuntimeError("Sampler passed an empty sample IDs list.")
-
-    for i in sample_ids:
-        val = dataset[i]
-        # convert output tensors to ndarrays
-        yield _convert_row(val)
-
-
-def _cpp_sampler_fn_mp(sample_ids, sample_fn):
-    """
-    Multiprocessing generator function wrapper for mappable dataset with cpp sampler.
-    """
-    if not isinstance(sample_ids, np.ndarray):
-        raise RuntimeError("Sample IDs are not in a numpy array.")
-    if sample_ids.size == 0:
-        raise RuntimeError("Sampler passed an empty sample IDs list.")
-
-    return sample_fn.process(sample_ids)
-
-
-def _fill_worker_indices(workers, indices, idx):
-    """
-    Worker index queue filler, fill worker index queue in round robin order.
-    """
-    num_worker = len(workers)
-    while idx < len(indices):
-        try:
-            workers[idx % num_worker].put(indices[idx])
-            idx += 1
-        except queue.Full:
-            break
-    return idx
-
-
-def _check_shm_usage(num_worker, queue_size, max_rowsize, num_queues=1):
-    """
-    Check sufficient shared memory is available for shared memory queues
-    when training in parallel mode.
-    """
-    threshold_ratio = 0.8
-    if platform.system().lower() not in {"windows", "darwin"}:
-        device_num = _get_device_num()
-        # In the cluster, _get_device_num indicates the number of the entire cluster. The maximum number of cards
-        # on the ascend server is 8.
-        if device_num > 1 and context.get_context("device_target") == "Ascend":
-            device_num = min(device_num, 8)
-        shm_estimate_usage = device_num * num_worker * num_queues * \
-                             (queue_size + 2) * max_rowsize * 1024 * 1024
-        try:
-            shm_available = psutil.disk_usage('/dev/shm').free
-            if shm_estimate_usage >= threshold_ratio * shm_available:
-                raise RuntimeError(
-                    "Insufficient shared memory available. Required: {}, Available: {}. "
-                    "The required memory can't exceed 80% of the available shared memory, "
-                    "it's recommended to reduce memory usage by following methods:\n"
-                    "1. reduce value of parameter max_rowsize or num_parallel_workers.\n"
-                    "2. reduce prefetch size by set_prefetch_size().\n"
-                    "3. disable shared memory by set_enable_shared_mem()."
-                    .format(shm_estimate_usage, shm_available))
-        except FileNotFoundError:
-            raise RuntimeError("Expected /dev/shm to exist.")
-
-
-def _convert_row(row):
-    """
-    Convert Op return value to numpy
-    """
-    value = []
-    if isinstance(row, dict):
-        raise ValueError("Return value in user defined python function should be numpy array, but got dict.")
-
-    # convert each column in row into numpy array
-    for x in row:
-        if isinstance(x, bytes):         # got image bytes from a file
-            value.append(np.frombuffer(x, np.uint8))
-        elif isinstance(x, Tensor):      # got mindspore.Tensor
-            value.append(x.asnumpy())
-        elif isinstance(x, dict):
-            raise ValueError("Return value in user defined python function should be numpy array, but got dict.")
-        else:
-            value.append(np.array(x, copy=False))
-    return tuple(value)
-
-
-class SamplerFn:
-    """
-    Multiprocessing or multithread generator function wrapper master process.
-    """
-
-    def __init__(self, dataset, num_worker, multi_process, max_rowsize):
-        self.workers = []
-        self.num_worker = num_worker
-        self.multi_process = multi_process
-        self.need_join = False
-        self.ppid = os.getpid()
-        self.pids = []
-        self.check_interval = 300  # the interval of check queue's size
-        self._final_join = True
-
-        # Event for end of epoch
-        if multi_process is True:
-            try:
-                self.eof = multiprocessing.Event()
-            except Exception:
-                raise RuntimeError("Init multiprocessing.Event() failed, This might be caused by insufficient shm,"
-                                   + " and the recommended shm size is at least 5 GB.")
-        else:
-            self.eof = threading.Event()
-        # Create workers
-
-        # get default queue size and adjust queuesize per worker if there are large # workers
-        queue_size = get_prefetch_size()
-        queue_size = min(queue_size, queue_size * 4 // num_worker)
-        queue_size = max(2, queue_size)
-
-        if multi_process and get_enable_shared_mem():
-            _check_shm_usage(num_worker, queue_size, max_rowsize)
-        for _ in range(num_worker):
-            if multi_process is True:
-                try:
-                    worker = _GeneratorWorkerMp(dataset, self.eof, max_rowsize, queue_size)
-                except Exception:
-                    raise RuntimeError("Init multiprocessing.Queue() failed, This might be caused by insufficient shm,"
-                                       + " and the recommended shm size is at least 5 GB.")
-                worker.daemon = True
-                # When multi processes fork a subprocess, the lock of the main process is copied to the subprocess,
-                # which may cause deadlock. Therefore, the subprocess startup is performed in che initialization phase.
-                # In this phase, the main process is not locked.
-                worker.start()
-                self.pids.append(worker.pid)
-                self.need_join = True
-            else:
-                worker = _GeneratorWorkerMt(dataset, self.eof)
-                worker.daemon = True
-            self.workers.append(worker)
-        if multi_process is True and platform.system().lower() != 'windows':
-            self.eot = threading.Event()
-            self.watch_dog = threading.Thread(target=_watch_dog, args=(self.eot, self.workers))
-            self.watch_dog.daemon = True
-            self.watch_dog.start()
-
-            if self._final_join is True:
-                self._jointhread = Finalize(
-                    self.watch_dog, self._finalize_join,
-                    args=(weakref.ref(self.watch_dog), self.eot),
-                    exitpriority=-5
-                )
-
-    def process(self, indices):
-        """
-        The main process, start the child process or child thread, and fill the index queue.
-        Get the result and return.
-        """
-        for w in self.workers:
-            # Check whether the queue of the subprocess is empty.
-            if not w.queue_empty():
-                raise Exception("The queue of the subprocess is not empty.")
-            # Start all workers
-            if not w.is_alive():
-                w.start()
-
-        # Fill initial index queues
-        idx_cursor = 0
-        idx_cursor = _fill_worker_indices(self.workers, indices, idx_cursor)
-
-        # Fetch results
-        for i in range(len(indices)):
-            if self.eof.is_set():
-                self._stop_subprocess()
-                return
-            if self.multi_process is True and not psutil.pid_exists(self.workers[i % self.num_worker].pid):
-                self._stop_subprocess()
-                return
-            # Fetch result and put index
-            try:
-                # To avoid get timeout from queue, check the res_queue size.
-                start_time = int(time.time())
-                wait_count = 1
-                while self.workers[i % self.num_worker].res_queue.empty():
-                    time.sleep(0.1)
-                    cost_time = int(time.time()) - start_time
-                    if cost_time / self.check_interval >= wait_count:
-                        wait_count += 1
-                        logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
-                                       "thread/process of the generator generates data had been hung by gil lock.")
-
-                result = self.workers[i % self.num_worker].get()
-                if isinstance(result, ExceptionHandler):
-                    result.reraise()
-            except queue.Empty:
-                self._stop_subprocess()
-                raise Exception("Generator worker process timeout.")
-            except KeyboardInterrupt:
-                self._stop_subprocess()
-                raise Exception("Generator worker receives KeyboardInterrupt.")
-            if self.eof.is_set():
-                self._stop_subprocess()
-                return
-            if idx_cursor < len(indices):
-                idx_cursor = _fill_worker_indices(self.workers, indices, idx_cursor)
-            yield _convert_row(result)
-
-    def _stop_subprocess(self):
-        """Only the main process can call join."""
-        if self.need_join is True and self.ppid == os.getpid():
-            self.eof.set()
-            self.need_join = False
-            for w in self.workers:
-                if self.multi_process is True and hasattr(w, '_closed') and w._closed is False:  # pylint: disable=W0212
-                    w.join()
-            self._abort_watchdog()
-
-    def _abort_watchdog(self):
-        if hasattr(self, 'eot') and self.eot is not None and not self.eot.is_set():
-            self.eot.set()
-
-    @classmethod
-    def _finalize_join(cls, twr, eot):
-        thread = twr()
-        if thread is not None:
-            if eot is not None and not eot.is_set():
-                eot.set()
-            thread.join()
-
-    def __del__(self):
-        self._stop_subprocess()
-
-
-def _subprocess_handle(eof, signum, frame):
-    threading.Thread(target=eof.set()).start()
-
-
-def _generator_worker_loop(dataset, idx_queue, result_queue, eof, is_multiprocessing):
-    """
-    Multithread or multiprocess generator worker process loop.
-    """
-    if is_multiprocessing:
-        signal.signal(signal.SIGTERM, partial(_subprocess_handle, eof))
-    while True:
-        # Fetch index, block
-        try:
-            idx = idx_queue.get(timeout=1)
-        except KeyboardInterrupt:
-            if is_multiprocessing:
-                eof.set()
-                idx_queue.cancel_join_thread()
-                result_queue.cancel_join_thread()
-            raise Exception("Generator worker receives KeyboardInterrupt.")
-        except queue.Empty:
-            if eof.is_set():
-                if is_multiprocessing:
-                    idx_queue.cancel_join_thread()
-                    result_queue.cancel_join_thread()
-                return
-            # If end-of-file (eof) is not set, continue to get data from idx_queue
-            continue
-        if idx is None:
-            # When the queue is out of scope from master process, a None item can be fetched from the queue.
-            # Upon receiving None, worker process should check if eof is set.
-            if not eof.is_set():
-                raise Exception("")
-            return
-        if eof.is_set():
-            if is_multiprocessing:
-                idx_queue.cancel_join_thread()
-                result_queue.cancel_join_thread()
-            return
-        # Fetch data, any exception from __getitem__ will terminate worker and timeout master process
-        try:
-            result = dataset[idx]
-        except Exception:
-            result = ExceptionHandler(where="in GeneratorDataset worker process")
-        # Send data, block
-        while True:
-            try:
-                result_queue.put(result, timeout=5)
-            except KeyboardInterrupt:
-                if is_multiprocessing:
-                    eof.set()
-                    idx_queue.cancel_join_thread()
-                    result_queue.cancel_join_thread()
-                raise Exception("Generator worker receives KeyboardInterrupt.")
-            except queue.Full:
-                if eof.is_set():
-                    if is_multiprocessing:
-                        idx_queue.cancel_join_thread()
-                        result_queue.cancel_join_thread()
-                    return
-                # If eof is not set, continue to put data to result_queue
-                continue
-            break
-        del result, idx
-
-
-class _GeneratorWorkerMt(threading.Thread):
-    """
-    Worker process for multi-thread Generator.
-    """
-
-    def __init__(self, dataset, eof):
-        self.idx_queue = queue.Queue(16)
-        self.res_queue = queue.Queue(16)
-        super().__init__(target=_generator_worker_loop, args=(dataset, self.idx_queue, self.res_queue, eof, False))
-
-    def put(self, item):
-        """
-        Put function for worker index queue. Never block. Raise queue.Full on failure.
-        """
-        self.idx_queue.put_nowait(item)
-
-    def get(self):
-        """
-        Get function for worker result queue. Block with timeout.
-        """
-        return self.res_queue.get(timeout=30)
-
-    def queue_empty(self):
-        if not self.idx_queue.empty():
-            logger.warning("idx_queue is not empty")
-            return False
-        if not self.res_queue.empty():
-            logger.warning("res_queue is not empty")
-            return False
-        return True
-
-
-class _GeneratorWorkerMp(multiprocessing.Process):
-    """
-    Worker process for multiprocess Generator.
-    """
-
-    def __init__(self, dataset, eof, max_rowsize, queue_size):
-        self.idx_queue = multiprocessing.Queue(queue_size)
-        if get_enable_shared_mem():
-            self.res_queue = _SharedQueue(queue_size, max_rowsize=max_rowsize)
-        else:
-            self.res_queue = multiprocessing.Queue(queue_size)
-        self.idx_queue._joincancelled = True  # pylint: disable=W0212
-        self.res_queue._joincancelled = True  # pylint: disable=W0212
-        super().__init__(target=_generator_worker_loop, args=(dataset, self.idx_queue, self.res_queue, eof, True))
-
-    def put(self, item):
-        """
-        Put function for worker index queue. Never block. Raise queue.Full on failure.
-        """
-        self.idx_queue.put_nowait(item)
-
-    def get(self):
-        """
-        Get function for worker result queue. Block with timeout.
-        """
-        # Relax 10s to 30s, since it sometimes will cause "Generator worker process timeout"
-        # when we run too many iterators with infinite epoch(num_epoch=-1)
-        return self.res_queue.get(timeout=30)
-
-    def queue_empty(self):
-        if not self.idx_queue.empty():
-            logger.warning("idx_queue is not empty.")
-            return False
-        if not self.res_queue.empty():
-            logger.warning("res_queue is not empty.")
-            return False
-        return True
-
-
-class GeneratorDataset(MappableDataset, TextBaseDataset):
-    """
-    A source dataset that generates data from Python by invoking Python data source each epoch.
-
-    The column names and column types of generated dataset depend on Python data defined by users.
-
-    Args:
-        source (Union[Callable, Iterable, Random Accessible]):
-            A generator callable object, an iterable Python object or a random accessible Python object.
-            Callable source is required to return a tuple of NumPy arrays as a row of the dataset on source().next().
-            Iterable source is required to return a tuple of NumPy arrays as a row of the dataset on
-            iter(source).next().
-            Random accessible source is required to return a tuple of NumPy arrays as a row of the dataset on
-            source[idx].
-        column_names (Union[str, list[str]], optional): List of column names of the dataset (default=None). Users are
-            required to provide either column_names or schema.
-        column_types (list[mindspore.dtype], optional): List of column data types of the dataset (default=None).
-            If provided, sanity check will be performed on generator output.
-        schema (Union[Schema, str], optional): Path to the JSON schema file or schema object (default=None). Users are
-            required to provide either column_names or schema. If both are provided, schema will be used.
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
-            (default=None, expected order behavior shown in the table).
-        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
-            input is required (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            Random accessible input is required. When this argument is specified, `num_samples` reflects the maximum
-            sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
-            when num_shards is also specified. Random accessible input is required.
-        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
-            option could be beneficial if the Python operation is computational heavy (default=True).
-        max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
-            data between processes.  This is only used if python_multiprocessing is set to True (default 6 MB).
-
-    Raises:
-        RuntimeError: If source raises an exception during execution.
-        RuntimeError: If len of column_names does not match output len of source.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - Input `source` accept user defined Python function(PyFuncs), Do not add network computing operators from
-          mindspore.nn and mindspore.ops or others into this `source`.
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> import numpy as np
-        >>>
-        >>> # 1) Multidimensional generator function as callable input.
-        >>> def generator_multidimensional():
-        ...     for i in range(64):
-        ...         yield (np.array([[i, i + 1], [i + 2, i + 3]]),)
-        >>>
-        >>> dataset = ds.GeneratorDataset(source=generator_multidimensional, column_names=["multi_dimensional_data"])
-        >>>
-        >>> # 2) Multi-column generator function as callable input.
-        >>> def generator_multi_column():
-        ...     for i in range(64):
-        ...         yield np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]])
-        >>>
-        >>> dataset = ds.GeneratorDataset(source=generator_multi_column, column_names=["col1", "col2"])
-        >>>
-        >>> # 3) Iterable dataset as iterable input.
-        >>> class MyIterable:
-        ...     def __init__(self):
-        ...         self._index = 0
-        ...         self._data = np.random.sample((5, 2))
-        ...         self._label = np.random.sample((5, 1))
-        ...
-        ...     def __next__(self):
-        ...         if self._index >= len(self._data):
-        ...             raise StopIteration
-        ...         else:
-        ...             item = (self._data[self._index], self._label[self._index])
-        ...             self._index += 1
-        ...             return item
-        ...
-        ...     def __iter__(self):
-        ...         self._index = 0
-        ...         return self
-        ...
-        ...     def __len__(self):
-        ...         return len(self._data)
-        >>>
-        >>> dataset = ds.GeneratorDataset(source=MyIterable(), column_names=["data", "label"])
-        >>>
-        >>> # 4) Random accessible dataset as random accessible input.
-        >>> class MyAccessible:
-        ...     def __init__(self):
-        ...         self._data = np.random.sample((5, 2))
-        ...         self._label = np.random.sample((5, 1))
-        ...
-        ...     def __getitem__(self, index):
-        ...         return self._data[index], self._label[index]
-        ...
-        ...     def __len__(self):
-        ...         return len(self._data)
-        >>>
-        >>> dataset = ds.GeneratorDataset(source=MyAccessible(), column_names=["data", "label"])
-        >>>
-        >>> # list, dict, tuple of Python is also random accessible
-        >>> dataset = ds.GeneratorDataset(source=[(np.array(0),), (np.array(1),), (np.array(2),)], column_names=["col"])
-    """
-
-    @check_generatordataset
-    def __init__(self, source, column_names=None, column_types=None, schema=None, num_samples=None,
-                 num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None,
-                 python_multiprocessing=True, max_rowsize=6):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id)
-        if isinstance(source, builtins.zip):
-            # Although zip is iteratable, it does not have the feature of repeated iteration, so pass it to the array.
-            self.source = [item for item in source]
-        else:
-            self.source = source
-        self.prepared_source = None  # source to be sent to C++
-        if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
-            self.num_parallel_workers = 1
-            logger.warning(
-                "Input 'source' of 'GeneratorDataset' includes network computing operators like in mindspore.nn, "
-                "mindspore.ops, mindspore.numpy module and etc, which do not support multi-thread compiling, recommend"
-                " to replace it with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' "
-                "into 1.")
-
-        self.python_multiprocessing = python_multiprocessing
-
-        self.column_names = to_list(column_names)
-
-        if column_types is not None:
-            self.column_types = mstypelist_to_detypelist(column_types)
-        else:
-            self.column_types = []
-
-        self.schema = schema
-        if schema is not None:
-            self.schema = schema
-            if not isinstance(schema, Schema):
-                self.schema = Schema(schema)
-        # Move get dataset_size by len from parse to here, because self.source will
-        # lose attribution of '__len__' after deepcopy.
-        self.source_len = -1  # unknown
-        if hasattr(self.source, "__len__"):
-            self.source_len = len(self.source)
-
-        self.max_rowsize = max_rowsize
-        self.sample_fn = None
-
-    def __deepcopy__(self, memodict):
-        if id(self) in memodict:
-            return memodict[id(self)]
-        new_op = self.__safe_deepcopy__(memodict, exclude=("source", "__transfer_dataset__"))
-
-        sample_fn = None
-        if new_op.sampler is not None and hasattr(self.source, "__getitem__"):
-            # The reason why there is a try catch here is because when the new op is being constructed with shared
-            # memory enabled, there will be an exception thrown if there is not enough shared memory available
-            if self.source_len == -1:
-                raise RuntimeError("Attempt to construct a random access dataset, '__len__' method is required!")
-            try:
-                if new_op.num_parallel_workers > 1:
-                    self.__validate_memory_usage()
-
-                    sample_fn = SamplerFn(self.source, new_op.num_parallel_workers, self.python_multiprocessing,
-                                          self.max_rowsize)
-                    new_op.prepared_source = (lambda sample_ids: _cpp_sampler_fn_mp(sample_ids, sample_fn))
-                else:
-                    new_op.prepared_source = (lambda sample_ids: _cpp_sampler_fn(sample_ids, self.source))
-                new_op.sample_fn = sample_fn
-            except RuntimeError as e:
-                raise Exception(str(e))
-        else:
-            try:
-                new_op.sampler = None
-                new_op.sample_fn = sample_fn
-                new_op.source_len = min(new_op.source_len,
-                                        new_op.num_samples) if new_op.num_samples != 0 else new_op.source_len
-                iter(self.source)
-            except TypeError:
-                # Use generator function if input callable
-                new_op.prepared_source = (lambda: _generator_fn(self.source, new_op.num_samples))
-            else:
-                # Use iterator function if input is iterable
-                # Random accessible input is also iterable
-                new_op.prepared_source = (lambda: _iter_fn(self.source, new_op.num_samples))
-
-        return new_op
-
-    def is_shuffled(self):
-        return self.sampler.is_shuffled()
-
-    def is_sharded(self):
-        return self.sampler.is_sharded()
-
-    def parse(self, children=None):
-        if self.schema is None:
-            return cde.GeneratorNode(self.prepared_source, self.column_names, self.column_types, self.source_len,
-                                     self.sampler, self.num_parallel_workers)
-        schema = self.schema
-        if isinstance(schema, Schema):
-            schema = self.schema.cpp_schema
-        return cde.GeneratorNode(self.prepared_source, schema, self.source_len, self.sampler,
-                                 self.num_parallel_workers)
-
-    def __validate_memory_usage(self):
-        """
-        Check memory usage when mulit-processing mode, when 85% prompt warning and 100% raise error.
-        """
-        if self.python_multiprocessing:
-            # if use num_parallel_workers is to large when python_multiprocessing=True which would cause
-            # OOM error get the num_shards
-            valid_num_shards = 1
-            if isinstance(self.sampler, samplers.DistributedSampler):
-                valid_num_shards = self.sampler.num_shards
-            elif self.num_shards is not None:
-                valid_num_shards = self.num_shards
-
-            # get process memory usage
-            process = psutil.Process(os.getpid())
-            process_memory = process.memory_info().rss
-            sys_memory_free = psutil.virtual_memory().free
-
-            total_memory_maybe_used = process_memory * self.num_parallel_workers * valid_num_shards
-            if total_memory_maybe_used / sys_memory_free > 0.85:
-                valid_num_worker = math.floor(sys_memory_free * 0.85 / valid_num_shards / process_memory)
-                valid_num_worker = 1 if valid_num_worker <= 0 else valid_num_worker
-                info = "GeneratorDataset num_parallel_workers: " + str(self.num_parallel_workers) + \
-                       " is too large which maybe cause a lot of memory occupation (>85%) or out of memory(OOM) " \
-                       "during multi process running. Therefore, it is recommended to reduce num_parallel_workers to " \
-                       + str(valid_num_worker) + " or smaller."
-                logger.warning(info)
-
-
-class TFRecordDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset for reading and parsing datasets stored on disk in TFData format.
-
-    The columns of generated dataset depend on the source TFRecord files.
-
-    Args:
-        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
-            pattern of files. The list will be sorted in a lexicographical order.
-        schema (Union[str, Schema], optional): Path to the JSON schema file or schema object (default=None).
-            If the schema is not provided, the meta data from the TFData file is considered the schema.
-        columns_list (list[str], optional): List of columns to be read (default=None, read all columns).
-        num_samples (int, optional): The number of samples (rows) to be included in the dataset (default=None).
-            If num_samples is None and numRows(parsed from schema) does not exist, read the full dataset;
-            If num_samples is None and numRows(parsed from schema) is greater than 0, read numRows rows;
-            If both num_samples and numRows(parsed from schema) are greater than 0, read num_samples rows.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        shard_equal_rows (bool, optional): Get equal rows for all shards(default=False). If shard_equal_rows
-            is false, number of rows of each shard may be not equal, and may lead to a failure in distributed training.
-            When the number of samples of per TFRecord file are not equal, it is suggested to set to true.
-            This argument should only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_files are not valid or do not exist.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Examples:
-        >>> from mindspore import dtype as mstype
-        >>>
-        >>> tfrecord_dataset_dir = ["/path/to/tfrecord_dataset_file"] # contains 1 or multiple TFRecord files
-        >>> tfrecord_schema_file = "/path/to/tfrecord_schema_file"
-        >>>
-        >>> # 1) Get all rows from tfrecord_dataset_dir with no explicit schema.
-        >>> # The meta-data in the first row will be used as a schema.
-        >>> dataset = ds.TFRecordDataset(dataset_files=tfrecord_dataset_dir)
-        >>>
-        >>> # 2) Get all rows from tfrecord_dataset_dir with user-defined schema.
-        >>> schema = ds.Schema()
-        >>> schema.add_column(name='col_1d', de_type=mstype.int64, shape=[2])
-        >>> dataset = ds.TFRecordDataset(dataset_files=tfrecord_dataset_dir, schema=schema)
-        >>>
-        >>> # 3) Get all rows from tfrecord_dataset_dir with schema file.
-        >>> dataset = ds.TFRecordDataset(dataset_files=tfrecord_dataset_dir, schema=tfrecord_schema_file)
-    """
-
-    @check_tfrecorddataset
-    def __init__(self, dataset_files, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
-                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_files = self._find_files(dataset_files)
-        self.dataset_files.sort()
-
-        self.schema = schema
-        self.columns_list = replace_none(columns_list, [])
-        self.shard_equal_rows = replace_none(shard_equal_rows, False)
-
-        if self.schema is not None and (self.num_samples is None or self.num_samples == 0):
-            self.num_samples = Schema.get_num_rows(self.schema)
-
-    def parse(self, children=None):
-        schema = self.schema.cpp_schema if isinstance(self.schema, Schema) else self.schema
-        return cde.TFRecordNode(self.dataset_files, schema, self.columns_list, self.num_samples, self.shuffle_flag,
-                                self.num_shards, self.shard_id, self.shard_equal_rows)
-
-
-class ManifestDataset(MappableDataset):
-    """
-    A source dataset for reading images from a Manifest file.
-
-    The generated dataset has two columns: :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is of a scalar of uint64 type.
-
-    Args:
-        dataset_file (str): File to be read.
-        usage (str, optional): Acceptable usages include `train`, `eval` and `inference` (default= `train`).
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, will include all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        class_indexing (dict, optional): A str-to-int mapping from label name to index
-            (default=None, the folder names will be sorted alphabetically and each
-            class will be given a unique index starting from 0).
-        decode (bool, optional): decode the images after reading (default=False).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max number of samples per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_files are not valid or do not exist.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        RuntimeError: If class_indexing is not a dictionary.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> manifest_dataset_dir = "/path/to/manifest_dataset_file"
-        >>>
-        >>> # 1) Read all samples specified in manifest_dataset_dir dataset with 8 threads for training
-        >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, usage="train", num_parallel_workers=8)
-        >>>
-        >>> # 2) Read samples (specified in manifest_file.manifest) for shard 0 in a 2-way distributed training setup
-        >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, num_shards=2, shard_id=0)
-    """
-
-    @check_manifestdataset
-    def __init__(self, dataset_file, usage="train", num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_file = dataset_file
-        self.decode = replace_none(decode, False)
-        self.usage = replace_none(usage, "train")
-        self.class_indexing = replace_none(class_indexing, {})
-
-    def parse(self, children=None):
-        return cde.ManifestNode(self.dataset_file, self.usage, self.sampler, self.class_indexing, self.decode)
-
-    def get_class_indexing(self):
-        """
-        Get the class index.
-
-        Returns:
-            dict, a str-to-int mapping from label name to index.
-
-        Examples:
-            >>> manifest_dataset_dir = "/path/to/manifest_dataset_file"
-            >>>
-            >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir)
-            >>> class_indexing = dataset.get_class_indexing()
-        """
-        if self.class_indexing is None or not self.class_indexing:
-            if self._class_indexing is None:
-                runtime_getter = self._init_tree_getters()
-                self._class_indexing = runtime_getter[0].GetClassIndexing()
-            self.class_indexing = {}
-            for pair in self._class_indexing:
-                self.class_indexing[pair[0]] = pair[1][0]
-        return self.class_indexing
-
-
-class AGNewsDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses AG News datasets.
-
-    The generated dataset has three columns: :py:obj:`[index, title, description]`.
-    The tensor of column :py:obj:`index` is of the string type.
-    The tensor of column :py:obj:`title` is of the string type.
-    The tensor of column :py:obj:`description` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Acceptable usages include `train`, `test` and `all` (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Examples:
-        >>> ag_news_dataset_dir = "/path/to/ag_news_dataset_file"
-        >>> dataset = ds.AGNewsDataset(dataset_dir=ag_news_dataset_dir, usage='all')
-
-    About AGNews dataset:
-
-    AG is a collection of over 1 million news articles. The news articles were collected
-    by ComeToMyHead from over 2,000 news sources in over 1 year of activity. ComeToMyHead
-    is an academic news search engine that has been in operation since July 2004.
-    The dataset is provided by academics for research purposes such as data mining
-    (clustering, classification, etc.), information retrieval (ranking, searching, etc.),
-    xml, data compression, data streaming, and any other non-commercial activities.
-    AG's news topic classification dataset was constructed by selecting the four largest
-    classes from the original corpus. Each class contains 30,000 training samples and
-    1,900 test samples. The total number of training samples in train.csv is 120,000
-    and the number of test samples in test.csv is 7,600.
-
-    You can unzip the dataset files into the following structure and read by MindSpore's API:
-
-    .. code-block::
-
-        .
-        └── ag_news_dataset_dir
-            ├── classes.txt
-            ├── train.csv
-            ├── test.csv
-            └── readme.txt
-
-    Citation:
-
-    .. code-block::
-
-        @misc{zhang2015characterlevel,
-        title={Character-level Convolutional Networks for Text Classification},
-        author={Xiang Zhang and Junbo Zhao and Yann LeCun},
-        year={2015},
-        eprint={1509.01626},
-        archivePrefix={arXiv},
-        primaryClass={cs.LG}
-        }
-    """
-
-    @check_ag_news_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None,
-                 num_parallel_workers=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.AGNewsNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                              self.shard_id)
-
-
-class AmazonReviewDataset(SourceDataset):
-    """
-    A source dataset that reads and parses Amazon Review Polarity and Amazon Review Full datasets.
-
-    The generated dataset has three columns: :py:obj:`[label, title, content]`.
-    The tensor of column :py:obj:`label` is of the string type.
-    The tensor of column :py:obj:`title` is of the string type.
-    The tensor of column :py:obj:`content` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
-            or the Amazon Review Full dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` (default= `all`).
-            For Polarity dataset, `train` will read from 3,600,000 train samples,
-            `test` will read from 400,000 test samples,
-            `all` will read from all 4,000,000 samples.
-             For Full dataset, `train` will read from 3,000,000 train samples,
-            `test` will read from 650,000 test samples,
-            `all` will read from all 3,650,000 samples (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to be read (default=None, reads the full dataset).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the  mindspore.dataset.config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> amazon_review_dataset_dir = "/path/to/amazon_review_dataset_dir"
-        >>> dataset = ds.AmazonReviewDataset(dataset_dir=amazon_review_dataset_dir, usage='all')
-
-    About AmazonReview Dataset:
-
-    The Amazon reviews full dataset consists of reviews from Amazon. The data span a period of 18 years, including ~35
-    million reviews up to March 2013. Reviews include product and user information, ratings, and a plaintext review.
-    The dataset is mainly used for text classification, given the content and title, predict the correct star rating.
-
-    The Amazon reviews polarity dataset is constructed by taking review score 1 and 2 as negative, 4 and 5 as positive.
-    Samples of score 3 is ignored. In the dataset, class 1 is the negative and class 2 is the positive.
-
-    The Amazon Reviews Polarity and Amazon Reviews Full datasets have the same directory structures.
-    You can unzip the dataset files into the following structure and read by MindSpore's API:
-
-    .. code-block::
-
-        .
-        └── amazon_review_dir
-             ├── train.csv
-             ├── test.csv
-             └── readme.txt
-
-   Citation:
-
-    .. code-block::
-
-        @article{zhang2015character,
-          title={Character-level convolutional networks for text classification},
-          author={Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
-          journal={Advances in neural information processing systems},
-          volume={28},
-          pages={649--657},
-          year={2015}
-        }
-    """
-
-    @check_amazon_review_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-
-    def parse(self, children=None):
-        return cde.AmazonReviewNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                                    self.shard_id)
-
-
-class Cifar10Dataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Cifar10 dataset.
-    This api only supports parsing Cifar10 file in binary version now.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 50,000
-            train samples, `test` will read from 10,000 test samples, `all` will read from all 60,000 samples
-            (default=None, all samples).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from CIFAR10 dataset in sequence
-        >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from CIFAR10 dataset
-        >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_samples=350, shuffle=True)
-        >>>
-        >>> # 3) Get samples from CIFAR10 dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_shards=2, shard_id=0)
-        >>>
-        >>> # In CIFAR10 dataset, each dictionary has keys "image" and "label"
-
-    About CIFAR-10 dataset:
-
-    The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
-    with 6000 images per class. There are 50000 training images and 10000 test images.
-    The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks.
-
-    Here is the original CIFAR-10 dataset structure.
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── cifar-10-batches-bin
-             ├── data_batch_1.bin
-             ├── data_batch_2.bin
-             ├── data_batch_3.bin
-             ├── data_batch_4.bin
-             ├── data_batch_5.bin
-             ├── test_batch.bin
-             ├── readme.html
-             └── batches.meta.txt
-
-    Citation:
-
-    .. code-block::
-
-        @techreport{Krizhevsky09,
-        author       = {Alex Krizhevsky},
-        title        = {Learning multiple layers of features from tiny images},
-        institution  = {},
-        year         = {2009},
-        howpublished = {http://www.cs.toronto.edu/~kriz/cifar.html}
-        }
-    """
-
-    @check_mnist_cifar_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.Cifar10Node(self.dataset_dir, self.usage, self.sampler)
-
-
-class Cifar100Dataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Cifar100 dataset.
-
-    The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`coarse_label` and :py:obj:`fine_labels` are each a scalar of uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 50,000
-            train samples, `test` will read from 10,000 test samples, `all` will read from all 60,000 samples
-            (default=None, all samples).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, 'num_samples' reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and shuffle
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> cifar100_dataset_dir = "/path/to/cifar100_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from CIFAR100 dataset in sequence
-        >>> dataset = ds.Cifar100Dataset(dataset_dir=cifar100_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from CIFAR100 dataset
-        >>> dataset = ds.Cifar100Dataset(dataset_dir=cifar100_dataset_dir, num_samples=350, shuffle=True)
-        >>>
-        >>> # In CIFAR100 dataset, each dictionary has 3 keys: "image", "fine_label" and "coarse_label"
-
-    About CIFAR-100 dataset:
-
-    This dataset is just like the CIFAR-10, except it has 100 classes containing 600 images
-    each. There are 500 training images and 100 testing images per class. The 100 classes in
-    the CIFAR-100 are grouped into 20 superclasses. Each image comes with a "fine" label (the
-    class to which it belongs) and a "coarse" label (the superclass to which it belongs).
-
-    Here is the original CIFAR-100 dataset structure.
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── cifar-100-binary
-            ├── train.bin
-            ├── test.bin
-            ├── fine_label_names.txt
-            └── coarse_label_names.txt
-
-    Citation:
-
-    .. code-block::
-
-        @techreport{Krizhevsky09,
-        author       = {Alex Krizhevsky},
-        title        = {Learning multiple layers of features from tiny images},
-        institution  = {},
-        year         = {2009},
-        howpublished = {http://www.cs.toronto.edu/~kriz/cifar.html}
-        }
-    """
-
-    @check_mnist_cifar_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.Cifar100Node(self.dataset_dir, self.usage, self.sampler)
-
-
-class RandomDataset(SourceDataset):
-    """
-    A source dataset that generates random data.
-
-    Args:
-        total_rows (int, optional): Number of samples for the dataset to generate
-            (default=None, number of samples is random).
-        schema (Union[str, Schema], optional): Path to the JSON schema file or schema object (default=None).
-            If the schema is not provided, the random dataset generates a random schema.
-        columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, all samples).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, 'num_samples' reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-    """
-
-    @check_random_dataset
-    def __init__(self, total_rows=None, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
-                 cache=None, shuffle=None, num_shards=None, shard_id=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.total_rows = total_rows
-        if schema is not None:
-            self.total_rows = replace_none(total_rows, Schema.get_num_rows(schema))
-        self.schema = schema
-        self.columns_list = replace_none(columns_list, [])
-
-    def parse(self, children=None):
-        schema = self.schema.cpp_schema if isinstance(self.schema, Schema) else self.schema
-        return cde.RandomNode(self.total_rows, schema, self.columns_list)
-
-
 class Schema:
     """
     Class to represent a schema of a dataset.
@@ -6382,2971 +3545,6 @@ class Schema:
         return schema_obj.cpp_schema.get_num_rows()
 
 
-class UDPOSDataset(SourceDataset):
-    """
-    A source dataset that reads and parses UDPOS dataset.
-
-    The generated dataset has three columns: :py:obj:`[word, universal, stanford]`.
-    The tensor of column :py:obj:`word` is of the string type.
-    The tensor of column :py:obj:`universal` is of the string type.
-    The tensor of column :py:obj:`stanford` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train` will read from
-            12,543 train samples, `test` will read from 2,077 test samples, `valid` will read from 2,002 test samples,
-            `all` will read from all 16,622 samples (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> udpos_dataset_dir = "/path/to/udpos_dataset_dir"
-        >>> dataset = ds.UDPOSDataset(dataset_files=udpos_dataset_dir, usage='all')
-    """
-
-    @check_udpos_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
-                 shard_id=None, num_parallel_workers=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-
-    def parse(self, children=None):
-        return cde.UDPOSNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                             self.shard_id)
-
-
-class USPSDataset(SourceDataset):
-    """
-    A source dataset for reading and parsing the USPS dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be "train", "test" or "all". "train" will read from 7,291
-            train samples, "test" will read from 2,007 test samples, "all" will read from all 9,298 samples.
-            (default=None, will read all samples)
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not exist or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If usage is invalid.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Examples:
-        >>> usps_dataset_dir = "/path/to/usps_dataset_directory"
-        >>>
-        >>> # Read 3 samples from USPS dataset
-        >>> dataset = ds.USPSDataset(dataset_dir=usps_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In USPS dataset, each dictionary has keys "image" and "label"
-
-    About USPS dataset:
-
-    USPS is a digit dataset automatically scanned from envelopes by the U.S. Postal Service
-    containing a total of 9,298 16×16 pixel grayscale samples.
-    The images are centered, normalized and show a broad range of font styles.
-
-    Here is the original USPS dataset structure.
-    You can download and unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── usps_dataset_dir
-             ├── usps
-             ├── usps.t
-
-    Citation:
-
-    .. code-block::
-
-        @article{hull1994database,
-          title={A database for handwritten text recognition research},
-          author={Hull, Jonathan J.},
-          journal={IEEE Transactions on pattern analysis and machine intelligence},
-          volume={16},
-          number={5},
-          pages={550--554},
-          year={1994},
-          publisher={IEEE}
-        }
-    """
-
-    @check_usps_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.USPSNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                            self.shard_id)
-
-
-class WikiTextDataset(SourceDataset):
-    """
-    A source dataset that reads and parses WikiText2 and WikiText103 datasets.
-
-    The generated dataset has one column :py:obj:`[text]`.
-    The tensor of column :py:obj:`text` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Acceptable usages include `train`, `test`, 'valid' and `all`(default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Examples:
-        >>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
-        >>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
-
-    About WikiTextDataset dataset:
-
-    The WikiText Long Term Dependency Language Modeling Dataset is an English lexicon containing 100 million words.
-    These terms are drawn from Wikipedia's premium and benchmark articles, including versions of Wikitext2 and
-    Wikitext103. For WikiText2, it has 36718 lines in wiki.train.tokens, 4358 lines in wiki.test.tokens and
-    3760 lines in wiki.valid.tokens. For WikiText103, it has 1801350 lines in wiki.train.tokens, 4358 lines in
-    wiki.test.tokens and 3760 lines in wiki.valid.tokens.
-
-    Here is the original WikiText dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── WikiText2/WikiText103
-             ├── wiki.train.tokens
-             ├── wiki.test.tokens
-             ├── wiki.valid.tokens
-
-    Citation:
-
-    .. code-block::
-
-        @article{merity2016pointer,
-          title={Pointer sentinel mixture models},
-          author={Merity, Stephen and Xiong, Caiming and Bradbury, James and Socher, Richard},
-          journal={arXiv preprint arXiv:1609.07843},
-          year={2016}
-        }
-    """
-
-    @check_wiki_text_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.WikiTextNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                                self.shard_id)
-
-
-class VOCDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing VOC dataset.
-
-    The generated dataset with different task setting has different output columns:
-
-    - task = :py:obj:`Detection`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
-        :py:obj:`[label, dtype=uint32]`, :py:obj:`[difficult, dtype=uint32]`, :py:obj:`[truncate, dtype=uint32]`.
-    - task = :py:obj:`Segmentation`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[target,dtype=uint8]`.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        task (str, optional): Set the task type of reading voc data, now only support `Segmentation` or `Detection`
-            (default= `Segmentation`).
-        usage (str, optional): Set the task type of ImageSets(default= `train`). If task is `Segmentation`, image and
-            annotation list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is `Detection`, image and
-            annotation list will be loaded in ./ImageSets/Main/usage + ".txt"; if task and usage are not set, image and
-            annotation list will be loaded in ./ImageSets/Segmentation/train.txt as default.
-        class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
-            `Detection` task (default=None, the folder names will be sorted alphabetically and each
-            class will be given a unique index starting from 0).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the dataset
-            (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-        extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
-            :py:obj:`[_meta-filename, dtype=string]` will be output at the end (default=False).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If xml of Annotations is an invalid format.
-        RuntimeError: If xml of Annotations loss attribution of `object`.
-        RuntimeError: If xml of Annotations loss attribution of `bndbox`.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If task is not equal 'Segmentation' or 'Detection'.
-        ValueError: If task equal 'Segmentation' but class_indexing is not None.
-        ValueError: If txt related to mode is not exist.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op
-          is added to remove the prefix('_meta-').
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> voc_dataset_dir = "/path/to/voc_dataset_directory"
-        >>>
-        >>> # 1) Read VOC data for segmentation training
-        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Segmentation", usage="train")
-        >>>
-        >>> # 2) Read VOC data for detection training
-        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection", usage="train")
-        >>>
-        >>> # 3) Read all VOC dataset samples in voc_dataset_dir with 8 threads in random order
-        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection", usage="train",
-        ...                         num_parallel_workers=8)
-        >>>
-        >>> # 4) Read then decode all VOC dataset samples in voc_dataset_dir in sequence
-        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection", usage="train",
-        ...                         decode=True, shuffle=False)
-        >>>
-        >>> # In VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
-        >>> # In VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
-
-    About VOC dataset.
-
-    The PASCAL Visual Object Classes (VOC) challenge is a benchmark in visual
-    object category recognition and detection, providing the vision and machine
-    learning communities with a standard dataset of images and annotation, and
-    standard evaluation procedures.
-
-    You can unzip the original VOC-2012 dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── voc2012_dataset_dir
-            ├── Annotations
-            │    ├── 2007_000027.xml
-            │    ├── 2007_000032.xml
-            │    ├── ...
-            ├── ImageSets
-            │    ├── Action
-            │    ├── Layout
-            │    ├── Main
-            │    └── Segmentation
-            ├── JPEGImages
-            │    ├── 2007_000027.jpg
-            │    ├── 2007_000032.jpg
-            │    ├── ...
-            ├── SegmentationClass
-            │    ├── 2007_000032.png
-            │    ├── 2007_000033.png
-            │    ├── ...
-            └── SegmentationObject
-                 ├── 2007_000032.png
-                 ├── 2007_000033.png
-                 ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @article{Everingham10,
-        author       = {Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.},
-        title        = {The Pascal Visual Object Classes (VOC) Challenge},
-        journal      = {International Journal of Computer Vision},
-        volume       = {88},
-        year         = {2012},
-        number       = {2},
-        month        = {jun},
-        pages        = {303--338},
-        biburl       = {http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.html#bibtex},
-        howpublished = {http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html}
-        }
-    """
-
-    @check_vocdataset
-    def __init__(self, dataset_dir, task="Segmentation", usage="train", class_indexing=None, num_samples=None,
-                 num_parallel_workers=None, shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None,
-                 cache=None, extra_metadata=False):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.task = replace_none(task, "Segmentation")
-        self.usage = replace_none(usage, "train")
-        self.class_indexing = replace_none(class_indexing, {})
-        self.decode = replace_none(decode, False)
-        self.extra_metadata = extra_metadata
-
-    def parse(self, children=None):
-        return cde.VOCNode(self.dataset_dir, self.task, self.usage, self.class_indexing, self.decode, self.sampler,
-                           self.extra_metadata)
-
-    def get_class_indexing(self):
-        """
-        Get the class index.
-
-        Returns:
-            dict, a str-to-int mapping from label name to index.
-
-        Examples:
-            >>> voc_dataset_dir = "/path/to/voc_dataset_directory"
-            >>>
-            >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection")
-            >>> class_indexing = dataset.get_class_indexing()
-        """
-        if self.task != "Detection":
-            raise NotImplementedError("Only 'Detection' support get_class_indexing.")
-        if self.class_indexing is None or not self.class_indexing:
-            if self._class_indexing is None:
-                runtime_getter = self._init_tree_getters()
-                self._class_indexing = runtime_getter[0].GetClassIndexing()
-            self.class_indexing = {}
-            for pair in self._class_indexing:
-                self.class_indexing[pair[0]] = pair[1][0]
-        return self.class_indexing
-
-
-class _Caltech101Dataset:
-    """
-    Mainly for loading Caltech101 Dataset, and return two rows each time.
-    """
-
-    def __init__(self, dataset_dir, target_type="category", decode=False):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.image_dir = os.path.join(self.dataset_dir, "101_ObjectCategories")
-        self.annotation_dir = os.path.join(self.dataset_dir, "Annotations")
-        self.target_type = target_type
-        if self.target_type == "category":
-            self.column_names = ["image", "category"]
-        elif self.target_type == "annotation":
-            self.column_names = ["image", "annotation"]
-        else:
-            self.column_names = ["image", "category", "annotation"]
-        self.decode = decode
-        self.classes = sorted(os.listdir(self.image_dir))
-        if "BACKGROUND_Google" in self.classes:
-            self.classes.remove("BACKGROUND_Google")
-        name_map = {"Faces": "Faces_2",
-                    "Faces_easy": "Faces_3",
-                    "Motorbikes": "Motorbikes_16",
-                    "airplanes": "Airplanes_Side_2"}
-        self.annotation_classes = [name_map[class_name] if class_name in name_map else class_name
-                                   for class_name in self.classes]
-        self.image_index = []
-        self.image_label = []
-        for i, image_class in enumerate(self.classes):
-            sub_dir = os.path.join(self.image_dir, image_class)
-            if not os.path.isdir(sub_dir) or not os.access(sub_dir, os.R_OK):
-                continue
-            num_images = len(os.listdir(sub_dir))
-            self.image_index.extend(range(1, num_images + 1))
-            self.image_label.extend(num_images * [i])
-
-    def __getitem__(self, index):
-        image_file = os.path.join(self.image_dir, self.classes[self.image_label[index]],
-                                  "image_{:04d}.jpg".format(self.image_index[index]))
-        if not os.path.exists(image_file):
-            raise ValueError("The image file {} does not exist or permission denied!".format(image_file))
-        if self.decode:
-            image = np.asarray(Image.open(image_file).convert("RGB"))
-        else:
-            image = np.fromfile(image_file, dtype=np.uint8)
-
-        if self.target_type == "category":
-            return image, self.image_label[index]
-        annotation_file = os.path.join(self.annotation_dir, self.annotation_classes[self.image_label[index]],
-                                       "annotation_{:04d}.mat".format(self.image_index[index]))
-        if not os.path.exists(annotation_file):
-            raise ValueError("The annotation file {} does not exist or permission denied!".format(annotation_file))
-        annotation = loadmat(annotation_file)["obj_contour"]
-
-        if self.target_type == "annotation":
-            return image, annotation
-        return image, self.image_label[index], annotation
-
-    def __len__(self):
-        return len(self.image_index)
-
-
-class Caltech101Dataset(GeneratorDataset):
-    """
-    A source dataset that reads and parses Caltech101 dataset.
-
-    The columns of the generated dataset depend on the value of `target_type`.
-    When `target_type` is `category`, the columns are :py:obj:`[image, category]`.
-    When `target_type` is `annotation`, the columns are :py:obj:`[image, annotation]`.
-    When `target_type` is `all`, the columns are :py:obj:`[image, category, annotation]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`category` is of the uint32 type.
-    The tensor of column :py:obj:`annotation` is a 2-dimensional ndarray that stores the contour of the image
-    and consists of a series of points.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset. This root directory contains two
-            subdirectories, one is called 101_ObjectCategories, which stores images,
-            and the other is called Annotations, which stores annotations.
-        target_type (str, optional): Target of the image. If target_type is "category", return category represents
-            the target class. If target_type is "annotation", return annotation.
-            If target_type is "all", return category and annotation (default=None, means "category").
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data (default=1).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        decode (bool, optional): Whether or not to decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If target_type is not set correctly.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> caltech101_dataset_directory = "/path/to/caltech101_dataset_directory"
-        >>>
-        >>> # 1) Read all samples (image files) in caltech101_dataset_directory with 8 threads
-        >>> dataset = ds.Caltech101Dataset(dataset_dir=caltech101_dataset_directory, num_parallel_workers=8)
-        >>>
-        >>> # 2) Read all samples (image files) with the target_type "annotation"
-        >>> dataset = ds.Caltech101Dataset(dataset_dir=caltech101_dataset_directory, target_type="annotation")
-
-    About Caltech101Dataset:
-
-    Pictures of objects belonging to 101 categories. About 40 to 800 images per category.
-    Most categories have about 50 images. Collected in September 2003 by Fei-Fei Li, Marco Andreetto,
-    and Marc 'Aurelio Ranzato. The size of each image is roughly 300 x 200 pixels.
-    The official provides the contour data of each object in each picture, which is the annotation.
-
-    .. code-block::
-
-        .
-        └── caltech101_dataset_directory
-            ├── 101_ObjectCategories
-            │    ├── Faces
-            │    │    ├── image_0001.jpg
-            │    │    ├── image_0002.jpg
-            │    │    ...
-            │    ├── Faces_easy
-            │    │    ├── image_0001.jpg
-            │    │    ├── image_0002.jpg
-            │    │    ...
-            │    ├── ...
-            └── Annotations
-                 ├── Airplanes_Side_2
-                 │    ├── annotation_0001.mat
-                 │    ├── annotation_0002.mat
-                 │    ...
-                 ├── Faces_2
-                 │    ├── annotation_0001.mat
-                 │    ├── annotation_0002.mat
-                 │    ...
-                 ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @article{FeiFei2004LearningGV,
-        author    = {Li Fei-Fei and Rob Fergus and Pietro Perona},
-        title     = {Learning Generative Visual Models from Few Training Examples:
-                    An Incremental Bayesian Approach Tested on 101 Object Categories},
-        journal   = {Computer Vision and Pattern Recognition Workshop},
-        year      = {2004},
-        url       = {http://www.vision.caltech.edu/Image_Datasets/Caltech101/},
-        }
-    """
-
-    @check_caltech101_dataset
-    def __init__(self, dataset_dir, target_type=None, num_samples=None, num_parallel_workers=1,
-                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
-        self.dataset_dir = dataset_dir
-        self.target_type = replace_none(target_type, "category")
-        self.decode = replace_none(decode, False)
-        dataset = _Caltech101Dataset(self.dataset_dir, self.target_type, self.decode)
-        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-    def get_class_indexing(self):
-        """
-        Get the class index.
-
-        Returns:
-            dict, a str-to-int mapping from label name to index.
-        """
-        class_dict = {'Faces': 0, 'Faces_easy': 1, 'Leopards': 2, 'Motorbikes': 3, 'accordion': 4, 'airplanes': 5,
-                      'anchor': 6, 'ant': 7, 'barrel': 8, 'bass': 9, 'beaver': 10, 'binocular': 11, 'bonsai': 12,
-                      'brain': 13, 'brontosaurus': 14, 'buddha': 15, 'butterfly': 16, 'camera': 17, 'cannon': 18,
-                      'car_side': 19, 'ceiling_fan': 20, 'cellphone': 21, 'chair': 22, 'chandelier': 23,
-                      'cougar_body': 24, 'cougar_face': 25, 'crab': 26, 'crayfish': 27, 'crocodile': 28,
-                      'crocodile_head': 29, 'cup': 30, 'dalmatian': 31, 'dollar_bill': 32, 'dolphin': 33,
-                      'dragonfly': 34, 'electric_guitar': 35, 'elephant': 36, 'emu': 37, 'euphonium': 38, 'ewer': 39,
-                      'ferry': 40, 'flamingo': 41, 'flamingo_head': 42, 'garfield': 43, 'gerenuk': 44, 'gramophone': 45,
-                      'grand_piano': 46, 'hawksbill': 47, 'headphone': 48, 'hedgehog': 49, 'helicopter': 50, 'ibis': 51,
-                      'inline_skate': 52, 'joshua_tree': 53, 'kangaroo': 54, 'ketch': 55, 'lamp': 56, 'laptop': 57,
-                      'llama': 58, 'lobster': 59, 'lotus': 60, 'mandolin': 61, 'mayfly': 62, 'menorah': 63,
-                      'metronome': 64, 'minaret': 65, 'nautilus': 66, 'octopus': 67, 'okapi': 68, 'pagoda': 69,
-                      'panda': 70, 'pigeon': 71, 'pizza': 72, 'platypus': 73, 'pyramid': 74, 'revolver': 75,
-                      'rhino': 76, 'rooster': 77, 'saxophone': 78, 'schooner': 79, 'scissors': 80, 'scorpion': 81,
-                      'sea_horse': 82, 'snoopy': 83, 'soccer_ball': 84, 'stapler': 85, 'starfish': 86,
-                      'stegosaurus': 87, 'stop_sign': 88, 'strawberry': 89, 'sunflower': 90, 'tick': 91,
-                      'trilobite': 92, 'umbrella': 93, 'watch': 94, 'water_lilly': 95, 'wheelchair': 96, 'wild_cat': 97,
-                      'windsor_chair': 98, 'wrench': 99, 'yin_yang': 100}
-        return class_dict
-
-
-class Caltech256Dataset(MappableDataset):
-    """
-    A source dataset that reads and parses Caltech256 dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        decode (bool, optional): Whether or not to decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> caltech256_dataset_dir = "/path/to/caltech256_dataset_directory"
-        >>>
-        >>> # 1) Read all samples (image files) in caltech256_dataset_dir with 8 threads
-        >>> dataset = ds.Caltech256Dataset(dataset_dir=caltech256_dataset_dir, num_parallel_workers=8)
-
-    About Caltech256Dataset:
-
-    Caltech-256 is an object recognition dataset containing 30,607 real-world images, of different sizes,
-    spanning 257 classes (256 object classes and an additional clutter class).
-    Each class is represented by at least 80 images. The dataset is a superset of the Caltech-101 dataset.
-
-    .. code-block::
-
-        .
-        └── caltech256_dataset_directory
-             ├── 001.ak47
-             │    ├── 001_0001.jpg
-             │    ├── 001_0002.jpg
-             │    ...
-             ├── 002.american-flag
-             │    ├── 002_0001.jpg
-             │    ├── 002_0002.jpg
-             │    ...
-             ├── 003.backpack
-             │    ├── 003_0001.jpg
-             │    ├── 003_0002.jpg
-             │    ...
-             ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @article{griffin2007caltech,
-        title     = {Caltech-256 object category dataset},
-        added-at  = {2021-01-21T02:54:42.000+0100},
-        author    = {Griffin, Gregory and Holub, Alex and Perona, Pietro},
-        biburl    = {https://www.bibsonomy.org/bibtex/21f746f23ff0307826cca3e3be45f8de7/s364315},
-        interhash = {bfe1e648c1778c04baa60f23d1223375},
-        intrahash = {1f746f23ff0307826cca3e3be45f8de7},
-        publisher = {California Institute of Technology},
-        timestamp = {2021-01-21T02:54:42.000+0100},
-        year      = {2007}
-        }
-    """
-
-    @check_caltech256_dataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.Caltech256Node(self.dataset_dir, self.decode, self.sampler)
-
-
-class CocoDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing COCO dataset.
-
-    CocoDataset supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and
-    Panoptic Segmentation of 2017 Train/Val/Test dataset.
-
-    The generated dataset with different task setting has different output columns:
-
-    - task = :py:obj:`Detection`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
-        :py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`.
-    - task = :py:obj:`Stuff`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[segmentation,dtype=float32]`, \
-        :py:obj:`[iscrowd,dtype=uint32]`.
-    - task = :py:obj:`Keypoint`, output columns: :py:obj:`[image, dtype=uint8]`, \
-        :py:obj:`[keypoints, dtype=float32]`, :py:obj:`[num_keypoints, dtype=uint32]`.
-    - task = :py:obj:`Panoptic`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
-        :py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`, :py:obj:`[area, dtype=uint32]`.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        annotation_file (str): Path to the annotation JSON file.
-        task (str, optional): Set the task type for reading COCO data. Supported task types:
-            `Detection`, `Stuff`, `Panoptic` and `Keypoint` (default= `Detection`).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the configuration file).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the dataset
-            (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-        extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
-            output at the end :py:obj:`[_meta-filename, dtype=string]` (default=False).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        RuntimeError: If parse JSON file failed.
-        ValueError: If task is not in [`Detection`, `Stuff`, `Panoptic`, `Keypoint`].
-        ValueError: If annotation_file is not exist.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op is added
-          to remove the prefix('_meta-').
-        - CocoDataset doesn't support PKSampler.
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> coco_dataset_dir = "/path/to/coco_dataset_directory/images"
-        >>> coco_annotation_file = "/path/to/coco_dataset_directory/annotation_file"
-        >>>
-        >>> # 1) Read COCO data for Detection task
-        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
-        ...                          annotation_file=coco_annotation_file,
-        ...                          task='Detection')
-        >>>
-        >>> # 2) Read COCO data for Stuff task
-        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
-        ...                          annotation_file=coco_annotation_file,
-        ...                          task='Stuff')
-        >>>
-        >>> # 3) Read COCO data for Panoptic task
-        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
-        ...                          annotation_file=coco_annotation_file,
-        ...                          task='Panoptic')
-        >>>
-        >>> # 4) Read COCO data for Keypoint task
-        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
-        ...                          annotation_file=coco_annotation_file,
-        ...                          task='Keypoint')
-        >>>
-        >>> # In COCO dataset, each dictionary has keys "image" and "annotation"
-
-    About COCO dataset:
-
-    COCO(Microsoft Common Objects in Context) is a large-scale object detection, segmentation, and captioning dataset
-    with several features: Object segmentation, Recognition in context, Superpixel stuff segmentation,
-    330K images (>200K labeled), 1.5 million object instances, 80 object categories, 91 stuff categories,
-    5 captions per image, 250,000 people with keypoints. In contrast to the popular ImageNet dataset, COCO has fewer
-    categories but more instances in per category.
-
-    You can unzip the original COCO-2017 dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── coco_dataset_directory
-             ├── train2017
-             │    ├── 000000000009.jpg
-             │    ├── 000000000025.jpg
-             │    ├── ...
-             ├── test2017
-             │    ├── 000000000001.jpg
-             │    ├── 000000058136.jpg
-             │    ├── ...
-             ├── val2017
-             │    ├── 000000000139.jpg
-             │    ├── 000000057027.jpg
-             │    ├── ...
-             └── annotations
-                  ├── captions_train2017.json
-                  ├── captions_val2017.json
-                  ├── instances_train2017.json
-                  ├── instances_val2017.json
-                  ├── person_keypoints_train2017.json
-                  └── person_keypoints_val2017.json
-
-    Citation:
-
-    .. code-block::
-
-        @article{DBLP:journals/corr/LinMBHPRDZ14,
-        author        = {Tsung{-}Yi Lin and Michael Maire and Serge J. Belongie and
-                        Lubomir D. Bourdev and  Ross B. Girshick and James Hays and
-                        Pietro Perona and Deva Ramanan and Piotr Doll{\'{a}}r and C. Lawrence Zitnick},
-        title         = {Microsoft {COCO:} Common Objects in Context},
-        journal       = {CoRR},
-        volume        = {abs/1405.0312},
-        year          = {2014},
-        url           = {http://arxiv.org/abs/1405.0312},
-        archivePrefix = {arXiv},
-        eprint        = {1405.0312},
-        timestamp     = {Mon, 13 Aug 2018 16:48:13 +0200},
-        biburl        = {https://dblp.org/rec/journals/corr/LinMBHPRDZ14.bib},
-        bibsource     = {dblp computer science bibliography, https://dblp.org}
-        }
-    """
-
-    @check_cocodataset
-    def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None,
-                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None, cache=None,
-                 extra_metadata=False):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.annotation_file = annotation_file
-        self.task = replace_none(task, "Detection")
-        self.decode = replace_none(decode, False)
-        self.extra_metadata = extra_metadata
-
-    def parse(self, children=None):
-        return cde.CocoNode(self.dataset_dir, self.annotation_file, self.task, self.decode, self.sampler,
-                            self.extra_metadata)
-
-    def get_class_indexing(self):
-        """
-        Get the class index.
-
-        Returns:
-            dict, a str-to-list<int> mapping from label name to index.
-
-        Examples:
-            >>> coco_dataset_dir = "/path/to/coco_dataset_directory/images"
-            >>> coco_annotation_file = "/path/to/coco_dataset_directory/annotation_file"
-            >>>
-            >>> # Read COCO data for Detection task
-            >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
-            ...                          annotation_file=coco_annotation_file,
-            ...                          task='Detection')
-            >>>
-            >>> class_indexing = dataset.get_class_indexing()
-        """
-        if self.task not in {"Detection", "Panoptic"}:
-            raise NotImplementedError("Only 'Detection' and 'Panoptic' support get_class_indexing.")
-        if self._class_indexing is None:
-            runtime_getter = self._init_tree_getters()
-            self._class_indexing = dict(runtime_getter[0].GetClassIndexing())
-        return self._class_indexing
-
-
-class CoNLL2000Dataset(SourceDataset):
-    """
-    A source dataset that reads and parses CoNLL2000 dataset.
-
-    The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]`.
-    The tensor of column :py:obj:`word` is of the string type.
-    The tensor of column :py:obj:`pos_tag` is of the string type.
-    The tensor of column :py:obj:`chunk_tag` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test`,  or `all`. `train` will read from
-            8936 train samples, `test` will read from 2,012 test samples,
-            `all` will read from all 1,0948 samples (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> conll2000_dataset_dir = "/path/to/conll2000_dataset_dir"
-        >>> dataset = ds.CoNLL2000Dataset(dataset_files=conll2000_dataset_dir, usage='all')
-    """
-
-    @check_conll2000_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
-                 shard_id=None, num_parallel_workers=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-
-    def parse(self, children=None):
-        return cde.CoNLL2000Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                                 self.shard_id)
-
-
-class CelebADataset(MappableDataset):
-    """
-    A source dataset for reading and parsing CelebA dataset.
-    Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, attr]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`attr` is of the uint32 type and one hot encoded.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in
-            the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None).
-        usage (str, optional): Specify the `train`, `valid`, `test` part or `all` parts of dataset
-            (default= `all`, will read all samples).
-        sampler (Sampler, optional): Object used to choose samples from the dataset (default=None).
-        decode (bool, optional): decode the images after reading (default=False).
-        extensions (list[str], optional): List of file extensions to be included in the dataset (default=None).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will include all images).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> celeba_dataset_dir = "/path/to/celeba_dataset_directory"
-        >>>
-        >>> # Read 5 samples from CelebA dataset
-        >>> dataset = ds.CelebADataset(dataset_dir=celeba_dataset_dir, usage='train', num_samples=5)
-        >>>
-        >>> # Note: In celeba dataset, each data dictionary owns keys "image" and "attr"
-
-    About CelebA dataset:
-
-    CelebFaces Attributes Dataset (CelebA) is a large-scale face attributes dataset
-    with more than 200K celebrity images, each with 40 attribute annotations.
-
-    The images in this dataset cover large pose variations and background clutter.
-    CelebA has large diversities, large quantities, and rich annotations, including
-
-    * 10,177 number of identities,
-    * 202,599 number of face images,
-    * 5 landmark locations, 40 binary attributes annotations per image.
-
-    The dataset can be employed as the training and test sets for the following computer
-    vision tasks: face attribute recognition, face detection, landmark (or facial part)
-    localization, and face editing & synthesis.
-
-    Original CelebA dataset structure:
-
-    .. code-block::
-
-        .
-        └── CelebA
-             ├── README.md
-             ├── Img
-             │    ├── img_celeba.7z
-             │    ├── img_align_celeba_png.7z
-             │    └── img_align_celeba.zip
-             ├── Eval
-             │    └── list_eval_partition.txt
-             └── Anno
-                  ├── list_landmarks_celeba.txt
-                  ├── list_landmarks_align_celeba.txt
-                  ├── list_bbox_celeba.txt
-                  ├── list_attr_celeba.txt
-                  └── identity_CelebA.txt
-
-    You can unzip the dataset files into the following structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── celeba_dataset_directory
-            ├── list_attr_celeba.txt
-            ├── 000001.jpg
-            ├── 000002.jpg
-            ├── 000003.jpg
-            ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @article{DBLP:journals/corr/LiuLWT14,
-        author        = {Ziwei Liu and Ping Luo and Xiaogang Wang and Xiaoou Tang},
-        title         = {Deep Learning Face Attributes in the Wild},
-        journal       = {CoRR},
-        volume        = {abs/1411.7766},
-        year          = {2014},
-        url           = {http://arxiv.org/abs/1411.7766},
-        archivePrefix = {arXiv},
-        eprint        = {1411.7766},
-        timestamp     = {Tue, 10 Dec 2019 15:37:26 +0100},
-        biburl        = {https://dblp.org/rec/journals/corr/LiuLWT14.bib},
-        bibsource     = {dblp computer science bibliography, https://dblp.org},
-        howpublished  = {http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html}
-        }
-    """
-
-    @check_celebadataset
-    def __init__(self, dataset_dir, num_parallel_workers=None, shuffle=None, usage='all', sampler=None, decode=False,
-                 extensions=None, num_samples=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.decode = replace_none(decode, False)
-        self.extensions = replace_none(extensions, [])
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        if self.usage != "all":
-            dataset_dir = os.path.realpath(self.dataset_dir)
-            partition_file = os.path.join(dataset_dir, "list_eval_partition.txt")
-            if os.path.exists(partition_file) is False:
-                raise RuntimeError("Partition file can not be found when usage is not 'all'.")
-        return cde.CelebANode(self.dataset_dir, self.usage, self.sampler, self.decode, self.extensions)
-
-
-class CLUEDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses CLUE datasets.
-    Supported CLUE classification tasks: `AFQMC`, `TNEWS`, `IFLYTEK`, `CMNLI`, `WSC` and `CSL`.
-
-    The generated dataset with different task setting has different output columns:
-
-    - task = :py:obj:`AFQMC`
-        - usage = :py:obj:`train`, output columns: :py:obj:`[sentence1, dtype=string]`, \
-            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
-        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint8]`, \
-            :py:obj:`[sentence1, dtype=string]`, :py:obj:`[sentence2, dtype=string]`.
-        - usage = :py:obj:`eval`, output columns: :py:obj:`[sentence1, dtype=string]`, \
-            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
-
-    - task = :py:obj:`TNEWS`
-        - usage = :py:obj:`train`, output columns: :py:obj:`[label, dtype=string]`, \
-            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, :py:obj:`[keywords, dtype=string]`.
-        - usage = :py:obj:`test`, output columns: :py:obj:`[label, dtype=string]`, \
-            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, :py:obj:`[keywords, dtype=string]`.
-        - usage = :py:obj:`eval`, output columns: :py:obj:`[label, dtype=string]`, \
-            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, :py:obj:`[keywords, dtype=string]`.
-
-    - task = :py:obj:`IFLYTEK`
-        - usage = :py:obj:`train`, output columns: :py:obj:`[label, dtype=string]`, \
-            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
-        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=string]`, \
-            :py:obj:`[sentence, dtype=string]`.
-        - usage = :py:obj:`eval`, output columns: :py:obj:`[label, dtype=string]`, \
-            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
-
-    - task = :py:obj:`CMNLI`
-        - usage = :py:obj:`train`, output columns: :py:obj:`[sentence1, dtype=string]`, \
-            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
-        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint8]`, \
-            :py:obj:`[sentence1, dtype=string]`, :py:obj:`[sentence2, dtype=string]`.
-        - usage = :py:obj:`eval`, output columns: :py:obj:`[sentence1, dtype=string]`, \
-            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
-
-    - task = :py:obj:`WSC`
-        - usage = :py:obj:`train`, output columns: :py:obj:`[span1_index, dtype=uint8]`, \
-            :py:obj:`[span2_index, dtype=uint8]`, :py:obj:`[span1_text, dtype=string]`, \
-            :py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint8]`, \
-            :py:obj:`[text, dtype=string]`, :py:obj:`[label, dtype=string]`.
-        - usage = :py:obj:`test`, output columns: :py:obj:`[span1_index, dtype=uint8]`, \
-            :py:obj:`[span2_index, dtype=uint8]`, :py:obj:`[span1_text, dtype=string]`, \
-            :py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint8]`, :py:obj:`[text, dtype=string]`.
-        - usage = :py:obj:`eval`, output columns: :py:obj:`[span1_index, dtype=uint8]`, \
-            :py:obj:`[span2_index, dtype=uint8]`, :py:obj:`[span1_text, dtype=string]`, \
-            :py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint8]`, \
-            :py:obj:`[text, dtype=string]`, :py:obj:`[label, dtype=string]`.
-
-    - task = :py:obj:`CSL`
-        - usage = :py:obj:`train`, output columns: :py:obj:`[id, dtype=uint8]`, \
-            :py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`, :py:obj:`[label, dtype=string]`.
-        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint8]`, \
-            :py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`.
-        - usage = :py:obj:`eval`, output columns: :py:obj:`[id, dtype=uint8]`, \
-            :py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`, :py:obj:`[label, dtype=string]`.
-
-    Args:
-        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
-            a pattern of files. The list will be sorted in a lexicographical order.
-        task (str, optional): The kind of task, one of `AFQMC`, `TNEWS`, `IFLYTEK`, `CMNLI`, `WSC` and `CSL`.
-            (default=AFQMC).
-        usage (str, optional): Specify the `train`, `test` or `eval` part of dataset (default="train").
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will include all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_files are not valid or do not exist.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> clue_dataset_dir = ["/path/to/clue_dataset_file"] # contains 1 or multiple clue files
-        >>> dataset = ds.CLUEDataset(dataset_files=clue_dataset_dir, task='AFQMC', usage='train')
-
-    About CLUE dataset:
-
-    CLUE, a Chinese Language Understanding Evaluation benchmark. It contains multiple
-    tasks, including single-sentence classification, sentence pair classification, and machine
-    reading comprehension.
-
-    You can unzip the dataset files into the following structure and read by MindSpore's API,
-    such as afqmc dataset:
-
-    .. code-block::
-
-        .
-        └── afqmc_public
-             ├── train.json
-             ├── test.json
-             └── dev.json
-
-    Citation:
-
-    .. code-block::
-
-        @article{CLUEbenchmark,
-        title   = {CLUE: A Chinese Language Understanding Evaluation Benchmark},
-        author  = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li,
-                Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng,
-                Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou,
-                Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan},
-        journal = {arXiv preprint arXiv:2004.05986},
-        year    = {2020},
-        howpublished = {https://github.com/CLUEbenchmark/CLUE}
-        }
-    """
-
-    @check_cluedataset
-    def __init__(self, dataset_files, task='AFQMC', usage='train', num_samples=None, num_parallel_workers=None,
-                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_files = self._find_files(dataset_files)
-        self.usage = replace_none(usage, 'train')
-        self.task = replace_none(task, 'AFQMC')
-
-    def parse(self, children=None):
-        return cde.CLUENode(self.dataset_files, self.task, self.usage, self.num_samples, self.shuffle_flag,
-                            self.num_shards, self.shard_id)
-
-
-class CSVDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses comma-separated values (CSV) datasets.
-    The columns of generated dataset depend on the source CSV files.
-
-    Args:
-        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search
-            for a pattern of files. The list will be sorted in a lexicographical order.
-        field_delim (str, optional): A string that indicates the char delimiter to separate fields (default=',').
-        column_defaults (list, optional): List of default values for the CSV field (default=None). Each item
-            in the list is either a valid type (float, int, or string). If this is not provided, treats all
-            columns as string type.
-        column_names (list[str], optional): List of column names of the dataset (default=None). If this
-            is not provided, infers the column_names from the first row of CSV file.
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will include all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_files are not valid or do not exist.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> csv_dataset_dir = ["/path/to/csv_dataset_file"] # contains 1 or multiple csv files
-        >>> dataset = ds.CSVDataset(dataset_files=csv_dataset_dir, column_names=['col1', 'col2', 'col3', 'col4'])
-    """
-
-    @check_csvdataset
-    def __init__(self, dataset_files, field_delim=',', column_defaults=None, column_names=None, num_samples=None,
-                 num_parallel_workers=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_files = self._find_files(dataset_files)
-        self.dataset_files.sort()
-        self.field_delim = replace_none(field_delim, ',')
-        self.column_defaults = replace_none(column_defaults, [])
-        self.column_names = replace_none(column_names, [])
-
-    def parse(self, children=None):
-        return cde.CSVNode(self.dataset_files, self.field_delim, self.column_defaults, self.column_names,
-                           self.num_samples, self.shuffle_flag, self.num_shards, self.shard_id)
-
-
-class SBUDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the SBU dataset.
-
-    The generated dataset has two columns :py:obj:`[image, caption]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`caption` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        decode (bool, optional): Decode the images after reading (default=False).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> sbu_dataset_dir = "/path/to/sbu_dataset_directory"
-        >>> # Read 3 samples from SBU dataset
-        >>> dataset = ds.SBUDataset(dataset_dir=sbu_dataset_dir, num_samples=3)
-
-    About SBU dataset:
-
-    SBU dataset is a large captioned photo collection.
-    It contains one million images with associated visually relevant captions.
-
-    You should manually download the images using official download.m by replacing 'urls{i}(24, end)' with
-    'urls{i}(24:1:end)' and keep the directory as below.
-
-    .. code-block::
-
-        .
-        └─ dataset_dir
-           ├── SBU_captioned_photo_dataset_captions.txt
-           ├── SBU_captioned_photo_dataset_urls.txt
-           └── sbu_images
-               ├── m_3326_3596303505_3ce4c20529.jpg
-               ├── ......
-               └── m_2522_4182181099_c3c23ab1cc.jpg
-
-    Citation:
-
-    .. code-block::
-
-        @inproceedings{Ordonez:2011:im2text,
-          Author    = {Vicente Ordonez and Girish Kulkarni and Tamara L. Berg},
-          Title     = {Im2Text: Describing Images Using 1 Million Captioned Photographs},
-          Booktitle = {Neural Information Processing Systems ({NIPS})},
-          Year      = {2011},
-        }
-    """
-
-    @check_sbu_dataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.SBUNode(self.dataset_dir, self.decode, self.sampler)
-
-
-class SogouNewsDataset(SourceDataset):
-    """
-    A source dataset that reads and parses Sogou News dataset.
-
-    The generated dataset has three columns: :py:obj:`[index, title, content]`.
-    The tensor of column :py:obj:`index` is of the string type.
-    The tensor of column :py:obj:`title` is of the string type.
-    The tensor of column :py:obj:`content` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` .
-            `train` will read from 450,000 train samples, `test` will read from 60,000 test samples,
-            `all` will read from all 510,000 samples (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, read all samples).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> sogou_news_dataset_dir = "/path/to/sogou_news_dataset_dir"
-        >>> dataset = ds.SogouNewsDataset(dataset_files=sogou_news_dataset_dir, usage='all')
-
-    About SogouNews Dataset:
-
-    SogouNews dataset includes 3 columns, corresponding to class index (1 to 5), title and content. The title and
-    content are escaped using double quotes ("), and any internal double quote is escaped by 2 double quotes ("").
-    New lines are escaped by a backslash followed with an "n" character, that is "\n".
-
-    You can unzip the dataset files into the following structure and read by MindSpore's API:
-
-    .. code-block::
-
-        .
-        └── sogou_news_dir
-             ├── classes.txt
-             ├── readme.txt
-             ├── test.csv
-             └── train.csv
-
-    Citation:
-
-    .. code-block::
-
-        @misc{zhang2015characterlevel,
-            title={Character-level Convolutional Networks for Text Classification},
-            author={Xiang Zhang and Junbo Zhao and Yann LeCun},
-            year={2015},
-            eprint={1509.01626},
-            archivePrefix={arXiv},
-            primaryClass={cs.LG}
-        }
-    """
-
-    @check_sogou_news_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
-                 shard_id=None, num_parallel_workers=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-
-    def parse(self, children=None):
-        return cde.SogouNewsNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
-                                 self.num_shards, self.shard_id)
-
-
-class _Flowers102Dataset:
-    """
-    Mainly for loading Flowers102 Dataset, and return one row each time.
-    """
-
-    def __init__(self, dataset_dir, task, usage, decode):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.task = task
-        self.usage = usage
-        self.decode = decode
-
-        if self.task == "Classification":
-            self.column_names = ["image", "label"]
-        else:
-            self.column_names = ["image", "segmentation", "label"]
-
-        labels_path = os.path.join(self.dataset_dir, "imagelabels.mat")
-        setid_path = os.path.join(self.dataset_dir, "setid.mat")
-        # minus one to transform 1~102 to 0 ~ 101
-        self.labels = (loadmat(labels_path)["labels"][0] - 1).astype(np.uint32)
-        self.setid = loadmat(setid_path)
-
-        if self.usage == 'train':
-            self.indices = self.setid["trnid"][0].tolist()
-        elif self.usage == 'test':
-            self.indices = self.setid["tstid"][0].tolist()
-        elif self.usage == 'valid':
-            self.indices = self.setid["valid"][0].tolist()
-        elif self.usage == 'all':
-            self.indices = self.setid["trnid"][0].tolist()
-            self.indices += self.setid["tstid"][0].tolist()
-            self.indices += self.setid["valid"][0].tolist()
-        else:
-            raise ValueError("Input usage is not within the valid set of ['train', 'valid', 'test', 'all'].")
-
-    def __getitem__(self, index):
-        # range: 1 ~ 8189
-        image_path = os.path.join(self.dataset_dir, "jpg", "image_" + str(self.indices[index]).zfill(5) + ".jpg")
-        if not os.path.exists(image_path):
-            raise RuntimeError("Can not find image file: " + image_path)
-
-        if self.decode is True:
-            image = np.asarray(Image.open(image_path).convert("RGB"))
-        else:
-            image = np.fromfile(image_path, dtype=np.uint8)
-
-        label = self.labels[self.indices[index] - 1]
-
-        if self.task == "Segmentation":
-            segmentation_path = \
-                os.path.join(self.dataset_dir, "segmim", "segmim_" + str(self.indices[index]).zfill(5) + ".jpg")
-            if not os.path.exists(segmentation_path):
-                raise RuntimeError("Can not find segmentation file: " + segmentation_path)
-            if self.decode is True:
-                segmentation = np.asarray(Image.open(segmentation_path).convert("RGB"))
-            else:
-                segmentation = np.fromfile(segmentation_path, dtype=np.uint8)
-            return image, segmentation, label
-
-        return image, label
-
-    def __len__(self):
-        return len(self.indices)
-
-
-class Flowers102Dataset(GeneratorDataset):
-    """
-    A source dataset for reading and parsing Flowers102 dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]` or three :py:obj:`[image, segmentation, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`segmentation` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar or a tensor of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        task (str): Specify the 'Classification' or 'Segmentation' task (default='Classification').
-        usage (str): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
-            (default='all', will read all samples).
-        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
-        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
-            (default=None, expected order behavior shown in the table).
-        decode (bool, optional): Whether or not to decode the images and segmentations after reading (default=False).
-        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
-            input is required (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
-            sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
-            when num_shards is also specified. Random accessible input is required.
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> flowers102_dataset_dir = "/path/to/flowers102_dataset_directory"
-        >>> dataset = ds.Flowers102Dataset(dataset_dir=flowers102_dataset_dir,
-        ...                                task="Classification",
-        ...                                usage="all",
-        ...                                decode=True)
-
-    About Flowers102 dataset:
-
-    Flowers102 dataset consists of 102 flower categories.
-    The flowers commonly occur in the United Kingdom.
-    Each class consists of between 40 and 258 images.
-
-    Here is the original Flowers102 dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── flowes102_dataset_dir
-             ├── imagelabels.mat
-             ├── setid.mat
-             ├── jpg
-                  ├── image_00001.jpg
-                  ├── image_00002.jpg
-                  ├── ...
-             ├── segmim
-                  ├── segmim_00001.jpg
-                  ├── segmim_00002.jpg
-                  ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @InProceedings{Nilsback08,
-          author       = "Maria-Elena Nilsback and Andrew Zisserman",
-          title        = "Automated Flower Classification over a Large Number of Classes",
-          booktitle    = "Indian Conference on Computer Vision, Graphics and Image Processing",
-          month        = "Dec",
-          year         = "2008",
-        }
-    """
-
-    @check_flowers102dataset
-    def __init__(self, dataset_dir, task="Classification", usage="all", num_samples=None, num_parallel_workers=1,
-                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.task = replace_none(task, "Classification")
-        self.usage = replace_none(usage, "all")
-        self.decode = replace_none(decode, False)
-        dataset = _Flowers102Dataset(self.dataset_dir, self.task, self.usage, self.decode)
-        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-    def get_class_indexing(self):
-        """
-        Get the class index.
-
-        Returns:
-            dict, a str-to-int mapping from label name to index.
-        """
-        class_names = [
-            "pink primrose", "hard-leaved pocket orchid", "canterbury bells",
-            "sweet pea", "english marigold", "tiger lily", "moon orchid",
-            "bird of paradise", "monkshood", "globe thistle", "snapdragon",
-            "colt's foot", "king protea", "spear thistle", "yellow iris",
-            "globe-flower", "purple coneflower", "peruvian lily", "balloon flower",
-            "giant white arum lily", "fire lily", "pincushion flower", "fritillary",
-            "red ginger", "grape hyacinth", "corn poppy", "prince of wales feathers",
-            "stemless gentian", "artichoke", "sweet william", "carnation",
-            "garden phlox", "love in the mist", "mexican aster", "alpine sea holly",
-            "ruby-lipped cattleya", "cape flower", "great masterwort", "siam tulip",
-            "lenten rose", "barbeton daisy", "daffodil", "sword lily", "poinsettia",
-            "bolero deep blue", "wallflower", "marigold", "buttercup", "oxeye daisy",
-            "common dandelion", "petunia", "wild pansy", "primula", "sunflower",
-            "pelargonium", "bishop of llandaff", "gaura", "geranium", "orange dahlia",
-            "pink-yellow dahlia?", "cautleya spicata", "japanese anemone",
-            "black-eyed susan", "silverbush", "californian poppy", "osteospermum",
-            "spring crocus", "bearded iris", "windflower", "tree poppy", "gazania",
-            "azalea", "water lily", "rose", "thorn apple", "morning glory",
-            "passion flower", "lotus", "toad lily", "anthurium", "frangipani",
-            "clematis", "hibiscus", "columbine", "desert-rose", "tree mallow",
-            "magnolia", "cyclamen", "watercress", "canna lily", "hippeastrum",
-            "bee balm", "ball moss", "foxglove", "bougainvillea", "camellia", "mallow",
-            "mexican petunia", "bromelia", "blanket flower", "trumpet creeper",
-            "blackberry lily"
-        ]
-
-        class_dict = {}
-        for i, class_name in enumerate(class_names):
-            class_dict[class_name] = i
-
-        return class_dict
-
-
-class LJSpeechDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing LJSpeech dataset.
-
-    The generated dataset has four columns :py:obj:`[waveform, sample_rate, transcription, normalized_transcript]`.
-    The tensor of column :py:obj:`waveform` is a tensor of the float32 type.
-    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
-    The tensor of column :py:obj:`transcription` is a scalar of the string type.
-    The tensor of column :py:obj:`normalized_transcript` is a scalar of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_samples (int, optional): The number of audios to be included in the dataset
-            (default=None, all audios).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> lj_speech_dataset_dir = "/path/to/lj_speech_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from LJSPEECH dataset in sequence
-        >>> dataset = ds.LJSpeechDataset(dataset_dir=lj_speech_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from LJSPEECH dataset
-        >>> dataset = ds.LJSpeechDataset(dataset_dir=lj_speech_dataset_dir, num_samples=350, shuffle=True)
-        >>>
-        >>> # 3) Get samples from LJSPEECH dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.LJSpeechDataset(dataset_dir=lj_speech_dataset_dir, num_shards=2, shard_id=0)
-        >>>
-        >>> # In LJSPEECH dataset, each dictionary has keys "waveform", "sample_rate", "transcription"
-        >>> # and "normalized_transcript"
-
-    About LJSPEECH dataset:
-
-    This is a public domain speech dataset consisting of 13,100 short audio clips of a single speaker
-    reading passages from 7 non-fiction books. A transcription is provided for each clip.
-    Clips vary in length from 1 to 10 seconds and have a total length of approximately 24 hours.
-
-    The texts were published between 1884 and 1964, and are in the public domain.
-    The audio was recorded in 2016-17 by the LibriVox project and is also in the public domain.
-
-    Here is the original LJSPEECH dataset structure.
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── LJSpeech-1.1
-            ├── README
-            ├── metadata.csv
-            └── wavs
-                ├── LJ001-0001.wav
-                ├── LJ001-0002.wav
-                ├── LJ001-0003.wav
-                ├── LJ001-0004.wav
-                ├── LJ001-0005.wav
-                ├── LJ001-0006.wav
-                ├── LJ001-0007.wav
-                ├── LJ001-0008.wav
-                ...
-                ├── LJ050-0277.wav
-                └── LJ050-0278.wav
-
-    Citation:
-
-    .. code-block::
-
-        @misc{lj_speech17,
-        author       = {Keith Ito and Linda Johnson},
-        title        = {The LJ Speech Dataset},
-        howpublished = {url{https://keithito.com/LJ-Speech-Dataset}},
-        year         = 2017
-        }
-    """
-
-    @check_lj_speech_dataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-
-    def parse(self, children=None):
-        return cde.LJSpeechNode(self.dataset_dir, self.sampler)
-
-
-class TextFileDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses datasets stored on disk in text format.
-    The generated dataset has one column :py:obj:`[text]` with type string.
-
-    Args:
-        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
-            pattern of files. The list will be sorted in a lexicographical order.
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will include all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_files are not valid or do not exist.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
-        >>> dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir)
-    """
-
-    @check_textfiledataset
-    def __init__(self, dataset_files, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_files = self._find_files(dataset_files)
-        self.dataset_files.sort()
-
-    def parse(self, children=None):
-        return cde.TextFileNode(self.dataset_files, self.num_samples, self.shuffle_flag, self.num_shards,
-                                self.shard_id)
-
-
-class _NumpySlicesDataset:
-    """
-    Mainly for dealing with several kinds of formats of Python data, and return one row each time.
-    """
-
-    def __init__(self, data, column_list=None):
-        self.column_list = None
-        # Convert dict data into tuple
-        if isinstance(data, dict):
-            data = self.process_dict(data)
-
-        if isinstance(data, tuple):
-            self.data = ()
-            data_len = len(data)
-            for i in range(data_len):
-                self.data = self.data + (np.array(data[i]),)
-        else:
-            self.data = (np.array(data),)
-
-        # check whether the data length in each column is equal
-        data_len = [len(data_item) for data_item in self.data]
-        if data_len[1:] != data_len[:-1]:
-            raise ValueError("Data length in each column is not equal.")
-
-        # Init column_name
-        if column_list is not None:
-            self.column_list = column_list
-        elif self.column_list is None:
-            self.column_list = []
-            column_num = len(self.data)
-            for i in range(column_num):
-                self.column_list.append("column_" + str(i))
-
-    def __getitem__(self, index):
-        data_row = [d[index, ...] for d in self.data]
-        data_res = tuple(data_row)
-        return data_res
-
-    def __len__(self):
-        return len(self.data[0])
-
-    def process_dict(self, input_data):
-        """
-        Convert the dict like data into tuple format, when input is a tuple of dicts then compose it into a dict first.
-        """
-        # Convert pandas like dict(has "values" column) into General dict
-        data_keys = list(input_data.keys())
-        data_col = input_data[data_keys[0]]
-        if hasattr(data_col, "values"):
-            new_dict = {}
-            for key in data_keys:
-                item1 = input_data.pop(key)
-                new_dict[key] = item1.values
-            input_data = new_dict
-
-        # Convert the data in dict into tuple
-        data = ()
-        keys = list(input_data.keys())
-        self.column_list = keys
-        for key in keys:
-            value = input_data[key]
-            data = data + (list(value),)
-
-        return data
-
-
-class NumpySlicesDataset(GeneratorDataset):
-    """
-    Creates a dataset with given data slices, mainly for loading Python data into dataset.
-
-    The column names and column types of generated dataset depend on Python data defined by users.
-
-    Args:
-        data (Union[list, tuple, dict]) Input of given data. Supported data types include: list, tuple, dict and other
-            NumPy formats. Input data will be sliced along the first dimension and generate additional rows, if input is
-            list, there will be one column in each row, otherwise there tends to be multi columns. Large data is not
-            recommended to be loaded in this way as data is loading into memory.
-        column_names (list[str], optional): List of column names of the dataset (default=None). If column_names is not
-            provided, the output column names will be named as the keys of dict when the input data is a dict,
-            otherwise they will be named like column_0, column_1 ...
-        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all samples).
-        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
-            (default=None, expected order behavior shown in the table).
-        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
-            input is required (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            Random accessible input is required. When this argument is specified, `num_samples` reflects the max
-            sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
-            when num_shards is also specified. Random accessible input is required.
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Raises:
-        RuntimeError: If len of column_names does not match output len of data.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Examples:
-        >>> # 1) Input data can be a list
-        >>> data = [1, 2, 3]
-        >>> dataset = ds.NumpySlicesDataset(data=data, column_names=["column_1"])
-        >>>
-        >>> # 2) Input data can be a dictionary, and column_names will be its keys
-        >>> data = {"a": [1, 2], "b": [3, 4]}
-        >>> dataset = ds.NumpySlicesDataset(data=data)
-        >>>
-        >>> # 3) Input data can be a tuple of lists (or NumPy arrays), each tuple element refers to data in each column
-        >>> data = ([1, 2], [3, 4], [5, 6])
-        >>> dataset = ds.NumpySlicesDataset(data=data, column_names=["column_1", "column_2", "column_3"])
-        >>>
-        >>> # 4) Load data from CSV file
-        >>> import pandas as pd
-        >>> df = pd.read_csv(filepath_or_buffer=csv_dataset_dir[0])
-        >>> dataset = ds.NumpySlicesDataset(data=dict(df), shuffle=False)
-    """
-
-    @check_numpyslicesdataset
-    def __init__(self, data, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None,
-                 num_shards=None, shard_id=None):
-        dataset = _NumpySlicesDataset(data, column_names)
-        super().__init__(dataset, column_names=dataset.column_list, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-
-class _PaddedDataset:
-    """
-    Mainly for combining false samples provided by users into a dataset.
-
-    Args:
-        padded_samples (list(dict)): Data provided by user to be added to the initial Dataset.
-    """
-
-    def __init__(self, padded_samples):
-        self.column_names = list(padded_samples[0].keys())
-        self.padded_samples = padded_samples
-
-    def __getitem__(self, item):
-        return (self.padded_samples[item][key] for key in self.column_names)
-
-    def __len__(self):
-        return len(self.padded_samples)
-
-
-class PaddedDataset(GeneratorDataset):
-    """
-    Creates a dataset with filler data provided by user. Mainly used to add to the original data set
-    and assign it to the corresponding shard.
-
-    Args:
-        padded_samples (list(dict)): Samples provided by user.
-
-    Raises:
-        TypeError: If padded_samples is not an instance of list.
-        TypeError: If the element of padded_samples is not an instance of dict.
-        ValueError: If the padded_samples is empty.
-
-    Examples:
-        >>> import numpy as np
-        >>> data = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)}]
-        >>> dataset = ds.PaddedDataset(padded_samples=data)
-    """
-
-    @check_paddeddataset
-    def __init__(self, padded_samples):
-        dataset = _PaddedDataset(padded_samples)
-        super().__init__(dataset, column_names=dataset.column_names, num_shards=None, shard_id=None, shuffle=False)
-        self._dataset_size = len(dataset.padded_samples)
-        self.padded_samples = padded_samples
-
-
-class EMnistDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the EMNIST dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        name (str): Name of splits for this dataset, can be "byclass", "bymerge", "balanced", "letters", "digits"
-            or "mnist".
-        usage (str, optional): Usage of this dataset, can be "train", "test" or "all".
-            (default=None, will read all samples).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> emnist_dataset_dir = "/path/to/emnist_dataset_directory"
-        >>>
-        >>> # Read 3 samples from EMNIST dataset
-        >>> dataset = ds.EMnistDataset(dataset_dir=emnist_dataset_dir, name="mnist", num_samples=3)
-        >>>
-        >>> # Note: In emnist_dataset dataset, each dictionary has keys "image" and "label"
-
-    About EMNIST dataset:
-
-    The EMNIST dataset is a set of handwritten character digits derived from the NIST Special
-    Database 19 and converted to a 28x28 pixel image format and dataset structure that directly
-    matches the MNIST dataset. Further information on the dataset contents and conversion process
-    can be found in the paper available at https://arxiv.org/abs/1702.05373v1.
-
-    The numbers of characters and classes of each split of EMNIST are as follows:
-
-    By Class: 814,255 characters and 62 unbalanced classes.
-    By Merge: 814,255 characters and 47 unbalanced classes.
-    Balanced: 131,600 characters and 47 balanced classes.
-    Letters: 145,600 characters and 26 balanced classes.
-    Digits: 280,000 characters and 10 balanced classes.
-    MNIST: 70,000 characters and 10 balanced classes.
-
-    Here is the original EMNIST dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── mnist_dataset_dir
-             ├── emnist-mnist-train-images-idx3-ubyte
-             ├── emnist-mnist-train-labels-idx1-ubyte
-             ├── emnist-mnist-test-images-idx3-ubyte
-             ├── emnist-mnist-test-labels-idx1-ubyte
-             ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @article{cohen_afshar_tapson_schaik_2017,
-        title        = {EMNIST: Extending MNIST to handwritten letters},
-        DOI          = {10.1109/ijcnn.2017.7966217},
-        journal      = {2017 International Joint Conference on Neural Networks (IJCNN)},
-        author       = {Cohen, Gregory and Afshar, Saeed and Tapson, Jonathan and Schaik, Andre Van},
-        year         = {2017},
-        howpublished = {https://www.westernsydney.edu.au/icns/reproducible_research/
-                        publication_support_materials/emnist}
-        }
-    """
-
-    @check_emnist_dataset
-    def __init__(self, dataset_dir, name, usage=None, num_samples=None, num_parallel_workers=None,
-                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.name = name
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.EMnistNode(self.dataset_dir, self.name, self.usage, self.sampler)
-
-
-class FakeImageDataset(MappableDataset):
-    """
-    A source dataset for generating fake images.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        num_images (int, optional): Number of images to generate in the dataset (default=1000).
-        image_size (tuple, optional):  Size of the fake image (default=(224, 224, 3)).
-        num_classes (int, optional): Number of classes in the dataset (default=10).
-        base_seed (int, optional): Offsets the index-based random seed used to generate each image (default=0).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
-            argument can only be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> # Read 3 samples from FakeImage dataset
-        >>> dataset = ds.FakeImageDataset(num_images=1000, image_size=(224,224,3),
-        ...                               num_classes=10, base_seed=0, num_samples=3)
-        >>>
-        >>> # Note: In FakeImage dataset, each dictionary has keys "image" and "label"
-    """
-
-    @check_fake_image_dataset
-    def __init__(self, num_images=1000, image_size=(224, 224, 3), num_classes=10, base_seed=0, num_samples=None,
-                 num_parallel_workers=None, shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.num_images = num_images
-        self.image_size = image_size
-        self.num_classes = num_classes
-        self.base_seed = base_seed
-
-    def parse(self, children=None):
-        return cde.FakeImageNode(self.num_images, self.image_size, self.num_classes, self.base_seed, self.sampler)
-
-
-class FlickrDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Flickr8k and Flickr30k dataset.
-
-    The generated dataset has two columns :py:obj:`[image, annotation]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`annotation` is a tensor which contains 5 annotations string,
-    such as ["a", "b", "c", "d", "e"].
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        annotation_file (str): Path to the root directory that contains the annotation.
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If annotation_file is not exist.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> flickr_dataset_dir = "/path/to/flickr_dataset_directory"
-        >>> annotation_file = "/path/to/flickr_annotation_file"
-        >>>
-        >>> # 1) Get all samples from FLICKR dataset in sequence
-        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
-        ...                            annotation_file=annotation_file,
-        ...                            shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from FLICKR dataset
-        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
-        ...                            annotation_file=annotation_file,
-        ...                            num_samples=350,
-        ...                            shuffle=True)
-        >>>
-        >>> # 3) Get samples from FLICKR dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
-        ...                            annotation_file=annotation_file,
-        ...                            num_shards=2,
-        ...                            shard_id=0)
-        >>>
-        >>> # In FLICKR dataset, each dictionary has keys "image" and "annotation"
-
-    About Flickr8k dataset:
-
-    The Flickr8k dataset consists of 8092 colour images. There are 40460 annotations in the Flickr8k.token.txt,
-    each image has 5 annotations.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── Flickr8k
-             ├── Flickr8k_Dataset
-             │    ├── 1000268201_693b08cb0e.jpg
-             │    ├── 1001773457_577c3a7d70.jpg
-             │    ├── ...
-             └── Flickr8k.token.txt
-
-    Citation:
-
-    .. code-block::
-
-        @article{DBLP:journals/jair/HodoshYH13,
-        author    = {Micah Hodosh and Peter Young and Julia Hockenmaier},
-        title     = {Framing Image Description as a Ranking Task: Data, Models and Evaluation Metrics},
-        journal   = {J. Artif. Intell. Res.},
-        volume    = {47},
-        pages     = {853--899},
-        year      = {2013},
-        url       = {https://doi.org/10.1613/jair.3994},
-        doi       = {10.1613/jair.3994},
-        timestamp = {Mon, 21 Jan 2019 15:01:17 +0100},
-        biburl    = {https://dblp.org/rec/journals/jair/HodoshYH13.bib},
-        bibsource = {dblp computer science bibliography, https://dblp.org}
-        }
-
-    About Flickr30k dataset:
-
-    The Flickr30k dataset consists of 31783 colour images. There are 158915 annotations in
-    the results_20130124.token, each image has 5 annotations.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    Citation:
-
-    .. code-block::
-
-        .
-        └── Flickr30k
-             ├── flickr30k-images
-             │    ├── 1000092795.jpg
-             │    ├── 10002456.jpg
-             │    ├── ...
-             └── results_20130124.token
-
-    .. code-block::
-
-        @article{DBLP:journals/tacl/YoungLHH14,
-        author    = {Peter Young and Alice Lai and Micah Hodosh and Julia Hockenmaier},
-        title     = {From image descriptions to visual denotations: New similarity metrics
-                     for semantic inference over event descriptions},
-        journal   = {Trans. Assoc. Comput. Linguistics},
-        volume    = {2},
-        pages     = {67--78},
-        year      = {2014},
-        url       = {https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/229},
-        timestamp = {Wed, 17 Feb 2021 21:55:25 +0100},
-        biburl    = {https://dblp.org/rec/journals/tacl/YoungLHH14.bib},
-        bibsource = {dblp computer science bibliography, https://dblp.org}
-        }
-    """
-
-    @check_flickr_dataset
-    def __init__(self, dataset_dir, annotation_file, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 decode=None, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.annotation_file = annotation_file
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.FlickrNode(self.dataset_dir, self.annotation_file, self.decode, self.sampler)
-
-
-class SBDataset(GeneratorDataset):
-    """
-    A source dataset for reading and parsing Semantic Boundaries Dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, task]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`task` contains 20 images of the uint8 type if `task` is `Boundaries` otherwise
-    contains 1 image of the uint8 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        task (str, optional): Acceptable tasks include `Boundaries` or `Segmentation` (default= `Boundaries`).
-        usage (str, optional): Acceptable usages include `train`, `val`, `train_noval` and `all` (default= `all`).
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If task is not in [`Boundaries`, `Segmentation`].
-        ValueError: If usage is not in [`train`, `val`, `train_noval`, `all`].
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> sb_dataset_dir = "/path/to/sb_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from Semantic Boundaries Dataset in sequence
-        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from Semantic Boundaries Dataset
-        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, num_samples=350, shuffle=True)
-        >>>
-        >>> # 3) Get samples from Semantic Boundaries Dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, num_shards=2, shard_id=0)
-        >>>
-        >>> # In Semantic Boundaries Dataset, each dictionary has keys "image" and "task"
-
-    About Semantic Boundaries Dataset:
-
-    The Semantic Boundaries Dataset consists of 11355 colour images. There are 8498 images' name in the train.txt,
-    2857 images' name in the val.txt and 5623 images' name in the train_noval.txt. The category cls/
-    contains the Segmentation and Boundaries results of category-level, the category inst/ catains the
-    Segmentation and Boundaries results of instance-level.
-
-    You can unzip the dataset files into the following structure and read by MindSpore's API:
-
-    .. code-block::
-
-         .
-         └── benchmark_RELEASE
-              ├── dataset
-              ├── img
-              │    ├── 2008_000002.jpg
-              │    ├── 2008_000003.jpg
-              │    ├── ...
-              ├── cls
-              │    ├── 2008_000002.mat
-              │    ├── 2008_000003.mat
-              │    ├── ...
-              ├── inst
-              │    ├── 2008_000002.mat
-              │    ├── 2008_000003.mat
-              │    ├── ...
-              ├── train.txt
-              └── val.txt
-
-    .. code-block::
-
-        @InProceedings{BharathICCV2011,
-            author       = "Bharath Hariharan and Pablo Arbelaez and Lubomir Bourdev and
-                            Subhransu Maji and Jitendra Malik",
-            title        = "Semantic Contours from Inverse Detectors",
-            booktitle    = "International Conference on Computer Vision (ICCV)",
-            year         = "2011",
-    """
-
-    @check_sb_dataset
-    def __init__(self, dataset_dir, task='Boundaries', usage='all', num_samples=None, num_parallel_workers=1,
-                 shuffle=None, decode=None, sampler=None, num_shards=None, shard_id=None):
-        dataset = _SBDataset(dataset_dir, task, usage, decode)
-        super().__init__(dataset, column_names=dataset.column_list, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-
-class _SBDataset:
-    """
-    Dealing with the data file with .mat extension, and return one row in tuple (image, task) each time.
-    """
-
-    def __init__(self, dataset_dir, task, usage, decode):
-        self.column_list = ['image', 'task']
-        self.task = task
-        self.images_path = os.path.join(dataset_dir, 'img')
-        self.cls_path = os.path.join(dataset_dir, 'cls')
-        self._loadmat = loadmat
-        self.categories = 20
-        self.decode = replace_none(decode, False)
-
-        if usage == "all":
-            image_names = []
-            for item in ["train", "val"]:
-                usage_path = os.path.join(dataset_dir, item + '.txt')
-                if not os.path.exists(usage_path):
-                    raise FileNotFoundError("SBDataset: {0} not found".format(usage_path))
-                with open(usage_path, 'r') as f:
-                    image_names += [x.strip() for x in f.readlines()]
-        else:
-            usage_path = os.path.join(dataset_dir, usage + '.txt')
-            if not os.path.exists(usage_path):
-                raise FileNotFoundError("SBDataset: {0} not found".format(usage_path))
-            with open(usage_path, 'r') as f:
-                image_names = [x.strip() for x in f.readlines()]
-
-        self.images = [os.path.join(self.images_path, i + ".jpg") for i in image_names]
-        self.clss = [os.path.join(self.cls_path, i + ".mat") for i in image_names]
-
-        if len(self.images) != len(self.clss):
-            raise ValueError("SBDataset: images count not equal to cls count")
-
-        self._get_data = self._get_boundaries_data if self.task == "Boundaries" else self._get_segmentation_data
-        self._get_item = self._get_decode_item if self.decode else self._get_undecode_item
-
-    def _get_boundaries_data(self, mat_path):
-        mat_data = self._loadmat(mat_path)
-        return np.concatenate([np.expand_dims(mat_data['GTcls'][0][self.task][0][i][0].toarray(), axis=0)
-                               for i in range(self.categories)], axis=0)
-
-    def _get_segmentation_data(self, mat_path):
-        mat_data = self._loadmat(mat_path)
-        return Image.fromarray(mat_data['GTcls'][0][self.task][0])
-
-    def _get_decode_item(self, idx):
-        return Image.open(self.images[idx]).convert('RGB'), self._get_data(self.clss[idx])
-
-    def _get_undecode_item(self, idx):
-        return np.fromfile(self.images[idx], dtype=np.uint8), self._get_data(self.clss[idx])
-
-    def __len__(self):
-        return len(self.images)
-
-    def __getitem__(self, idx):
-        return self._get_item(idx)
-
-
-class SpeechCommandsDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the SpeechCommands dataset.
-
-    The generated dataset has five columns :py:obj:`[waveform, sample_rate, label, speaker_id, utterance_number]`.
-    The tensor of column :py:obj:`waveform` is a vector of the float32 type.
-    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
-    The tensor of column :py:obj:`label` is a scalar of the string type.
-    The tensor of column :py:obj:`speaker_id` is a scalar of the string type.
-    The tensor of column :py:obj:`utterance_number` is a scalar of the int32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train`
-            will read from 84,843 samples, `test` will read from 11,005 samples, `valid` will read from 9,981
-            test samples and `all` will read from all 105,829 samples (default=None, will read all samples).
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will read all samples).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the dataset
-            (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only be specified
-            when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> speech_commands_dataset_dir = "/path/to/speech_commands_dataset_directory"
-        >>>
-        >>> # Read 3 samples from SpeechCommands dataset
-        >>> dataset = ds.SpeechCommandsDataset(dataset_dir=speech_commands_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In SpeechCommands dataset, each dictionary has keys "waveform", "sample_rate", "label",
-        >>> # "speaker_id" and "utterance_number".
-
-    About SpeechCommands dataset:
-
-    The SpeechCommands is database for limited_vocabulary speech recognition, containing 105,829 audio samples of
-    '.wav' format.
-
-    Here is the original SpeechCommands dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── speech_commands_dataset_dir
-             ├── cat
-                  ├── b433eff_nohash_0.wav
-                  ├── 5a33edf_nohash_1.wav
-                  └──....
-             ├── dog
-                  ├── b433w2w_nohash_0.wav
-                  └──....
-             ├── four
-             └── ....
-
-    Citation:
-
-    .. code-block::
-        @article{2018Speech,
-        title={Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition},
-        author={Warden, P.},
-        year={2018}
-        }
-    """
-
-    @check_speech_commands_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.SpeechCommandsNode(self.dataset_dir, self.usage, self.sampler)
-
-
 class DeserializedDataset(Dataset):
     def __init__(self, input_obj):
         super().__init__()
@@ -9357,1629 +3555,3 @@ class DeserializedDataset(Dataset):
             json_str = json.dumps(self.input_obj)
             return cde.Dataset.from_json_string(json_str)
         return cde.Dataset.from_json_file(self.input_obj)
-
-
-class CityscapesDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Cityscapes dataset.
-
-    The generated dataset has two columns :py:obj:`[image, task]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`task` is of the uint8 type if task is not 'polygon' otherwise task is
-    a string tensor with serialize json.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str): Acceptable usages include `train`, `test`, `val` or `all` if quality_mode is `fine`
-            otherwise `train`, `train_extra`, `val` or `all` (default= `train`).
-        quality_mode (str): Acceptable quality_modes include `fine` or `coarse` (default= `fine`).
-        task (str): Acceptable tasks include `instance`, `semantic`, `polygon` or `color` (default= `instance`).
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir is invalid or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If task is invalid.
-        ValueError: If quality_mode is invalid.
-        ValueError: If usage is invalid.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> cityscapes_dataset_dir = "/path/to/cityscapes_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from Cityscapes dataset in sequence
-        >>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, task="instance", quality_mode="fine",
-        ...                                usage="train", shuffle=False, num_parallel_workers=1)
-        >>>
-        >>> # 2) Randomly select 350 samples from Cityscapes dataset
-        >>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, num_samples=350, shuffle=True,
-        ...                                num_parallel_workers=1)
-        >>>
-        >>> # 3) Get samples from Cityscapes dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, num_shards=2, shard_id=0,
-        ...                                num_parallel_workers=1)
-        >>>
-        >>> # In Cityscapes dataset, each dictionary has keys "image" and "task"
-
-    About Cityscapes dataset:
-
-    The Cityscapes dataset consists of 5000 colour images with high quality dense pixel annotations and
-    19998 colour images with coarser polygonal annotations in 50 cities. There are 30 classes in this
-    dataset and the polygonal annotations include dense semantic segmentation and instance segmentation
-    for vehicle and people.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    Taking the quality_mode of `fine` as an example.
-
-    .. code-block::
-
-        .
-        └── Cityscapes
-             ├── leftImg8bit
-             |    ├── train
-             |    |    ├── aachen
-             |    |    |    ├── aachen_000000_000019_leftImg8bit.png
-             |    |    |    ├── aachen_000001_000019_leftImg8bit.png
-             |    |    |    ├── ...
-             |    |    ├── bochum
-             |    |    |    ├── ...
-             |    |    ├── ...
-             |    ├── test
-             |    |    ├── ...
-             |    ├── val
-             |    |    ├── ...
-             └── gtFine
-                  ├── train
-                  |    ├── aachen
-                  |    |    ├── aachen_000000_000019_gtFine_color.png
-                  |    |    ├── aachen_000000_000019_gtFine_instanceIds.png
-                  |    |    ├── aachen_000000_000019_gtFine_labelIds.png
-                  |    |    ├── aachen_000000_000019_gtFine_polygons.json
-                  |    |    ├── aachen_000001_000019_gtFine_color.png
-                  |    |    ├── aachen_000001_000019_gtFine_instanceIds.png
-                  |    |    ├── aachen_000001_000019_gtFine_labelIds.png
-                  |    |    ├── aachen_000001_000019_gtFine_polygons.json
-                  |    |    ├── ...
-                  |    ├── bochum
-                  |    |    ├── ...
-                  |    ├── ...
-                  ├── test
-                  |    ├── ...
-                  └── val
-                       ├── ...
-
-    Citation:
-
-    .. code-block::
-
-        @inproceedings{Cordts2016Cityscapes,
-        title       = {The Cityscapes Dataset for Semantic Urban Scene Understanding},
-        author      = {Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler,
-                        Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt},
-        booktitle   = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
-        year        = {2016}
-        }
-    """
-
-    @check_cityscapes_dataset
-    def __init__(self, dataset_dir, usage="train", quality_mode="fine", task="instance", num_samples=None,
-                 num_parallel_workers=None, shuffle=None, decode=None, sampler=None, num_shards=None,
-                 shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.task = task
-        self.quality_mode = quality_mode
-        self.usage = usage
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.CityscapesNode(self.dataset_dir, self.usage, self.quality_mode, self.task, self.decode, self.sampler)
-
-
-class DBpediaDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses the DBpedia dataset.
-
-    The generated dataset has three columns :py:obj:`[class, title, content]`.
-    The tensor of column :py:obj:`class` is of the string type.
-    The tensor of column :py:obj:`title` is of the string type.
-    The tensor of column :py:obj:`content` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`.
-            `train` will read from 560,000 train samples,
-            `test` will read from 70,000 test samples,
-            `all` will read from all 630,000 samples (default=None, all samples).
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will include all text).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL;
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Examples:
-        >>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
-        >>>
-        >>> # 1) Read 3 samples from DBpedia dataset
-        >>> dataset = ds.DBpediaDataset(dataset_dir=dbpedia_dataset_dir, num_samples=3)
-        >>>
-        >>> # 2) Read train samples from DBpedia dataset
-        >>> dataset = ds.DBpediaDataset(dataset_dir=dbpedia_dataset_dir, usage="train")
-
-    About DBpedia dataset:
-
-    The DBpedia dataset consists of 630,000 text samples in 14 classes, there are 560,000 samples in the train.csv
-    and 70,000 samples in the test.csv.
-    The 14 different classes represent Company, EducationaInstitution, Artist, Athlete, OfficeHolder,
-    MeanOfTransportation, Building, NaturalPlace, Village, Animal, Plant, Album, Film, WrittenWork.
-
-    Here is the original DBpedia dataset structure.
-    You can unzip the dataset files into this directory structure and read by Mindspore's API.
-
-    .. code-block::
-
-        .
-        └── dbpedia_dataset_dir
-            ├── train.csv
-            ├── test.csv
-            ├── classes.txt
-            └── readme.txt
-
-    .. code-block::
-
-        @article{DBpedia,
-        title   = {DBPedia Ontology Classification Dataset},
-        author  = {Jens Lehmann, Robert Isele, Max Jakob, Anja Jentzsch, Dimitris Kontokostas,
-                Pablo N. Mendes, Sebastian Hellmann, Mohamed Morsey, Patrick van Kleef,
-                    Sören Auer, Christian Bizer},
-        year    = {2015},
-        howpublished = {http://dbpedia.org}
-        }
-    """
-
-    @check_dbpedia_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.DBpediaNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
-                               self.shard_id)
-
-
-class DIV2KDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing DIV2KDataset dataset.
-
-    The generated dataset has two columns :py:obj:`[hr_image, lr_image]`.
-    The tensor of column :py:obj:`hr_image` is of the uint8 type.
-    The tensor of column :py:obj:`lr_image` is of the uint8 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str): Acceptable usages include `train`, `valid` or `all` (default= `train`).
-        downgrade (str): Acceptable downgrades include `bicubic`, `unknown`, `mild`, `difficult` or
-            `wild` (default= `bicubic`).
-        scale (int): Acceptable scales include 2, 3, 4 or 8 (default=2).
-            When `downgrade` is `bicubic`, scale can be 2, 3, 4, 8.
-            When `downgrade` is `unknown`, scale can only be 2, 3, 4.
-            When `downgrade` is `mild`, `difficult` or `wild`, scale can only be 4.
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir is invalid or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If usage is invalid.
-        ValueError: If downgrade is invalid.
-        ValueError: If scale is invalid.
-        ValueError: If scale equal to 8 and downgrade not equal to `bicubic`.
-        ValueError: If downgrade in [`mild`, `difficult`, `wild`] and scale not equal to 4.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> div2k_dataset_dir = "/path/to/div2k_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from DIV2K dataset in sequence
-        >>> dataset = ds.DIV2KDataset(dataset_dir=div2k_dataset_dir, usage="train", scale=2, downgrade="bicubic",
-        ...                           shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from DIV2K dataset
-        >>> dataset = ds.DIV2KDataset(dataset_dir=div2k_dataset_dir, usage="train", scale=2, downgrade="bicubic",
-        ...                           num_samples=350, shuffle=True)
-        >>>
-        >>> # 3) Get samples from DIV2K dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.DIV2KDataset(dataset_dir=div2k_dataset_dir, usage="train", scale=2, downgrade="bicubic",
-        ...                           num_shards=2, shard_id=0)
-        >>>
-        >>> # In DIV2K dataset, each dictionary has keys "hr_image" and "lr_image"
-
-    About DIV2K dataset:
-
-    The DIV2K dataset consists of 1000 2K resolution images, among which 800 images are for training, 100 images
-    are for validation and 100 images are for testing. NTIRE 2017 and NTIRE 2018 include only training dataset
-    and validation dataset.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    Take the training set as an example.
-
-    .. code-block::
-
-        .
-        └── DIV2K
-             ├── DIV2K_train_HR
-             |    ├── 0001.png
-             |    ├── 0002.png
-             |    ├── ...
-             ├── DIV2K_train_LR_bicubic
-             |    ├── X2
-             |    |    ├── 0001x2.png
-             |    |    ├── 0002x2.png
-             |    |    ├── ...
-             |    ├── X3
-             |    |    ├── 0001x3.png
-             |    |    ├── 0002x3.png
-             |    |    ├── ...
-             |    └── X4
-             |         ├── 0001x4.png
-             |         ├── 0002x4.png
-             |         ├── ...
-             ├── DIV2K_train_LR_unknown
-             |    ├── X2
-             |    |    ├── 0001x2.png
-             |    |    ├── 0002x2.png
-             |    |    ├── ...
-             |    ├── X3
-             |    |    ├── 0001x3.png
-             |    |    ├── 0002x3.png
-             |    |    ├── ...
-             |    └── X4
-             |         ├── 0001x4.png
-             |         ├── 0002x4.png
-             |         ├── ...
-             ├── DIV2K_train_LR_mild
-             |    ├── 0001x4m.png
-             |    ├── 0002x4m.png
-             |    ├── ...
-             ├── DIV2K_train_LR_difficult
-             |    ├── 0001x4d.png
-             |    ├── 0002x4d.png
-             |    ├── ...
-             ├── DIV2K_train_LR_wild
-             |    ├── 0001x4w.png
-             |    ├── 0002x4w.png
-             |    ├── ...
-             └── DIV2K_train_LR_x8
-                  ├── 0001x8.png
-                  ├── 0002x8.png
-                  ├── ...
-    Citation:
-
-    .. code-block::
-
-        @InProceedings{Agustsson_2017_CVPR_Workshops,
-        author    = {Agustsson, Eirikur and Timofte, Radu},
-        title     = {NTIRE 2017 Challenge on Single Image Super-Resolution: Dataset and Study},
-        booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
-        url       = "http://www.vision.ee.ethz.ch/~timofter/publications/Agustsson-CVPRW-2017.pdf",
-        month     = {July},
-        year      = {2017}
-        }
-    """
-
-    @check_div2k_dataset
-    def __init__(self, dataset_dir, usage="train", downgrade="bicubic", scale=2, num_samples=None,
-                 num_parallel_workers=None, shuffle=None, decode=None, sampler=None, num_shards=None,
-                 shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = usage
-        self.scale = scale
-        self.downgrade = downgrade
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.DIV2KNode(self.dataset_dir, self.usage, self.downgrade, self.scale, self.decode, self.sampler)
-
-
-class WIDERFaceDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing WIDERFace dataset.
-
-    When usage is "train", "valid" or "all", the generated dataset has eight columns ["image", "bbox", "blur",
-    "expression", "illumination", "occlusion", "pose", "invalid"]. When usage is "test", it only has one column
-    ["image"].
-    The tensor of column :py:obj:`image` is a vector of the uint8 type.
-    The tensor of column :py:obj:`bbox` is a scalar of the uint32 type.
-    The tensor of column :py:obj:`blur` is a scalar of the uint32 type.
-    The tensor of column :py:obj:`expression` is a scalar of the uint32 type.
-    The tensor of column :py:obj:`illumination` is a scalar of the uint32 type.
-    The tensor of column :py:obj:`occlusion` is a scalar of the uint32 type.
-    The tensor of column :py:obj:`pose` is a scalar of the uint32 type.
-    The tensor of column :py:obj:`invalid` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train` will read
-            from 12,880 samples, `test` will read from 16,097 samples, `valid` will read from 3,226 test samples
-            and `all` will read all `train` and `valid` samples (default=None, will be set to `all`).
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        decode (bool, optional): Decode the images after reading (default=False).
-        sampler (Sampler, optional): Object used to choose samples from the dataset
-            (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only be specified
-            when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-        ValueError: If usage is not in [`train`, `test`, `valid`, `all`].
-        ValueError: If annotation_file is not exist.
-        ValueError: If dataset_dir is not exist.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> wider_face_dir = "/path/to/wider_face_dataset"
-        >>>
-        >>> # Read 3 samples from WIDERFace dataset
-        >>> dataset = ds.WIDERFaceDataset(dataset_dir=wider_face_dir, num_samples=3)
-
-    About WIDERFace dataset:
-
-    The WIDERFace database of people faces has a training set of 12,880 samples, a testing set of 16,097 examples
-    and a validating set of 3,226 examples. It is a subset of a larger set available from WIDER. The digits have
-    been size-normalized and centered in a fixed-size image.
-
-    The following is the original WIDERFace dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── wider_face_dir
-             ├── WIDER_test
-             │    └── images
-             │         ├── 0--Parade
-             │         │     ├── 0_Parade_marchingband_1_9.jpg
-             │         │     ├── ...
-             │         ├──1--Handshaking
-             │         ├──...
-             ├── WIDER_train
-             │    └── images
-             │         ├── 0--Parade
-             │         │     ├── 0_Parade_marchingband_1_11.jpg
-             │         │     ├── ...
-             │         ├──1--Handshaking
-             │         ├──...
-             ├── WIDER_val
-             │    └── images
-             │         ├── 0--Parade
-             │         │     ├── 0_Parade_marchingband_1_102.jpg
-             │         │     ├── ...
-             │         ├──1--Handshaking
-             │         ├──...
-             └── wider_face_split
-                  ├── wider_face_test_filelist.txt
-                  ├── wider_face_train_bbx_gt.txt
-                  └── wider_face_val_bbx_gt.txt
-
-    Citation:
-
-    .. code-block::
-
-        @inproceedings{2016WIDER,
-          title={WIDER FACE: A Face Detection Benchmark},
-          author={Yang, S. and Luo, P. and Loy, C. C. and Tang, X.},
-          booktitle={IEEE},
-          pages={5525-5533},
-          year={2016},
-        }
-    """
-
-    @check_wider_face_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-        self.decode = replace_none(decode, False)
-
-    def parse(self, children=None):
-        return cde.WIDERFaceNode(self.dataset_dir, self.usage, self.decode, self.sampler)
-
-
-class YelpReviewDataset(SourceDataset, TextBaseDataset):
-    """
-    A source dataset that reads and parses Yelp Review Polarity and Yelp Review Full dataset.
-
-    The generated dataset has two columns: :py:obj:`[label, text]`.
-    The tensor of column :py:obj:`label` is of the string type.
-    The tensor of column :py:obj:`text` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`.
-            For Polarity, `train` will read from 560,000 train samples, `test` will read from 38,000 test samples,
-            `all` will read from all 598,000 samples.
-            For Full, `train` will read from 650,000 train samples, `test` will read from 50,000 test samples,
-            `all` will read from all 700,000 samples (default=None, all samples).
-        num_samples (int, optional): Number of samples (rows) to read (default=None, reads all samples).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-
-    Examples:
-        >>> yelp_review_dataset_dir = "/path/to/yelp_review_dataset_dir"
-        >>> dataset = ds.YelpReviewDataset(dataset_dir=yelp_review_dataset_dir, usage='all')
-
-    About YelpReview Dataset:
-
-    The Yelp Review Full dataset consists of reviews from Yelp. It is extracted from the Yelp Dataset Challenge 2015
-    data, and it is mainly used for text classification.
-
-    The Yelp Review Polarity dataset is constructed from the above dataset, by considering stars 1 and 2 negative, and 3
-    and 4 positive.
-
-    The directory structures of these two datasets are the same.
-    You can unzip the dataset files into the following structure and read by MindSpore's API:
-
-    .. code-block::
-
-        .
-        └── yelp_review_dir
-             ├── train.csv
-             ├── test.csv
-             └── readme.txt
-
-    Citation:
-
-    For Yelp Review Polarity:
-
-    .. code-block::
-
-        @article{zhangCharacterlevelConvolutionalNetworks2015,
-          archivePrefix = {arXiv},
-          eprinttype = {arxiv},
-          eprint = {1509.01626},
-          primaryClass = {cs},
-          title = {Character-Level {{Convolutional Networks}} for {{Text Classification}}},
-          abstract = {This article offers an empirical exploration on the use of character-level convolutional networks
-                      (ConvNets) for text classification. We constructed several large-scale datasets to show that
-                      character-level convolutional networks could achieve state-of-the-art or competitive results.
-                      Comparisons are offered against traditional models such as bag of words, n-grams and their TFIDF
-                      variants, and deep learning models such as word-based ConvNets and recurrent neural networks.},
-          journal = {arXiv:1509.01626 [cs]},
-          author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
-          month = sep,
-          year = {2015},
-        }
-
-    Citation:
-
-    For Yelp Review Full:
-
-    .. code-block::
-
-        @article{zhangCharacterlevelConvolutionalNetworks2015,
-          archivePrefix = {arXiv},
-          eprinttype = {arxiv},
-          eprint = {1509.01626},
-          primaryClass = {cs},
-          title = {Character-Level {{Convolutional Networks}} for {{Text Classification}}},
-          abstract = {This article offers an empirical exploration on the use of character-level convolutional networks
-                      (ConvNets) for text classification. We constructed several large-scale datasets to show that
-                      character-level convolutional networks could achieve state-of-the-art or competitive results.
-                      Comparisons are offered against traditional models such as bag of words, n-grams and their TFIDF
-                      variants, and deep learning models such as word-based ConvNets and recurrent neural networks.},
-          journal = {arXiv:1509.01626 [cs]},
-          author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
-          month = sep,
-          year = {2015},
-        }
-    """
-
-    @check_yelp_review_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
-                 shard_id=None, num_parallel_workers=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, 'all')
-
-    def parse(self, children=None):
-        return cde.YelpReviewNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
-                                  self.num_shards, self.shard_id)
-
-
-class YesNoDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing the YesNo dataset.
-
-    The generated dataset has three columns :py:obj:`[waveform, sample_rate, labels]`.
-    The tensor of column :py:obj:`waveform` is a vector of the float32 type.
-    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
-    The tensor of column :py:obj:`labels` is a scalar of the int32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_samples (int, optional): The number of images to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, will use value set in the config).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
-            (default=None, expected order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only
-            be specified when `num_shards` is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> yes_no_dataset_dir = "/path/to/yes_no_dataset_directory"
-        >>>
-        >>> # Read 3 samples from YesNo dataset
-        >>> dataset = ds.YesNoDataset(dataset_dir=yes_no_dataset_dir, num_samples=3)
-        >>>
-        >>> # Note: In YesNo dataset, each dictionary has keys "waveform", "sample_rate", "label"
-
-    About YesNo dataset:
-
-    Yesno is an audio dataset consisting of 60 recordings of one individual saying yes or no in Hebrew; each
-    recording is eight words long. It was created for the Kaldi audio project by an author who wishes to
-    remain anonymous.
-
-    Here is the original YesNo dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── yes_no_dataset_dir
-             ├── 1_1_0_0_1_1_0_0.wav
-             ├── 1_0_0_0_1_1_0_0.wav
-             ├── 1_1_0_0_1_1_0_0.wav
-             └──....
-
-    Citation:
-
-    .. code-block::
-
-        @NetworkResource{Kaldi_audio_project,
-        author    = {anonymous},
-        url       = "http://wwww.openslr.org/1/"
-        }
-    """
-
-    @check_yes_no_dataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-
-    def parse(self, children=None):
-        return cde.YesNoNode(self.dataset_dir, self.sampler)
-
-
-class SemeionDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Semeion dataset.
-
-    The generated dataset has two columns :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will read all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> semeion_dataset_dir = "/path/to/semeion_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from SEMEION dataset in sequence
-        >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 10 samples from SEMEION dataset
-        >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, num_samples=10, shuffle=True)
-        >>>
-        >>> # 3) Get samples from SEMEION dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, num_shards=2, shard_id=0)
-        >>>
-        >>> # In SEMEION dataset, each dictionary has keys: image, label.
-
-    About SEMEION dataset:
-
-    The dataset was created by Tactile Srl, Brescia, Italy (http://www.tattile.it) and donated in 1994
-    to Semeion Research Center of Sciences of Communication, Rome, Italy (http://www.semeion.it),
-    for machine learning research.
-
-    This dataset consists of 1593 records (rows) and 256 attributes (columns). Each record represents
-    a handwritten digit, originally scanned with a resolution of 256 grey scale. Each pixel of the each
-    original scanned image was first stretched, and after scaled between 0 and 1
-    (setting to 0 every pixel whose value was under the value 127 of the grey scale (127 included)
-    and setting to 1 each pixel whose original value in the grey scale was over 127). Finally, each binary image
-    was scaled again into a 16x16 square box (the final 256 binary attributes).
-
-    .. code-block::
-
-        .
-        └── semeion_dataset_dir
-            └──semeion.data
-            └──semeion.names
-
-    Citation:
-
-    .. code-block::
-
-        @article{
-          title={The Theory of Independent Judges, in Substance Use & Misuse 33(2)1998, pp 439-461},
-          author={M Buscema, MetaNet},
-        }
-    """
-
-    @check_semeion_dataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-
-    def parse(self, children=None):
-        return cde.SemeionNode(self.dataset_dir, self.sampler)
-
-
-class TedliumDataset(MappableDataset):
-    """
-    A source dataset for reading and parsing Tedlium dataset.
-    The columns of generated dataset depend on the source SPH files and the corresponding STM files.
-
-    The generated dataset has six columns :py:obj:`[waveform, sample_rate, transcript, talk_id, speaker_id,
-    identifier]`.
-
-    The tensor of column :py:obj:`waveform` is of the float32 type.
-    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
-    The tensor of column :py:obj:`transcript` is a scalar of the string type.
-    The tensor of column :py:obj:`talk_id` is a scalar of the string type.
-    The tensor of column :py:obj:`speaker_id` is a scalar of the string type.
-    The tensor of column :py:obj:`identifier` is a scalar of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        release (str): Release of the dataset, can be "release1", "release2", "release3".
-        usage (str, optional): Usage of this dataset.
-            For release1 or release2, can be `train`, `test`, ` dev` or `all`.
-            `train` will read from train samples,
-            `test` will read from test samples,
-            `dev` will read from dev samples,
-            `all` will read from all samples.
-            For release3, can only be "all", it will read from data samples (default=None, all samples).
-        extensions (str): Extensions of the SPH files, only '.sph' is valid.
-            (default=None, ".sph").
-        num_samples (int, optional): The number of audio samples to be included in the dataset
-            (default=None, all samples).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, `num_samples` reflects
-            the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain stm files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter `sampler`
-         - Parameter `shuffle`
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> # 1) Get all train samples from TEDLIUM_release1 dataset in sequence.
-        >>> dataset = ds.TedliumDataset(dataset_dir="/path/to/tedlium1_dataset_directory",
-        ...                             release="release1", shuffle=False)
-        >>>
-        >>> # 2) Randomly select 10 samples from TEDLIUM_release2 dataset.
-        >>> dataset = ds.TedliumDataset(dataset_dir="/path/to/tedlium2_dataset_directory",
-        ...                             release="release2", num_samples=10, shuffle=True)
-        >>>
-        >>> # 3) Get samples from TEDLIUM_release-3 dataset for shard 0 in a 2-way distributed training.
-        >>> dataset = ds.TedliumDataset(dataset_dir="/path/to/tedlium3_dataset_directory",
-        ...                             release="release3", num_shards=2, shard_id=0)
-        >>>
-        >>> # In TEDLIUM dataset, each dictionary has keys : waveform, sample_rate, transcript, talk_id,
-        >>> # speaker_id and identifier.
-
-    About TEDLIUM_release1 dataset:
-
-    The TED-LIUM corpus is English-language TED talks, with transcriptions, sampled at 16kHz.
-    It contains about 118 hours of speech.
-
-    About TEDLIUM_release2 dataset:
-
-    This is the TED-LIUM corpus release 2, licensed under Creative Commons BY-NC-ND 3.0. All talks and text are
-    property of TED Conferences LLC. The TED-LIUM corpus was made from audio talks and their transcriptions available
-    on the TED website. We have prepared and filtered these data in order to train acoustic models to participate to
-    the International Workshop on Spoken Language Translation 2011 (the LIUM English/French SLT system reached the
-    first rank in the SLT task).
-
-    About TEDLIUM_release-3 dataset:
-
-    This is the TED-LIUM corpus release 3, licensed under Creative Commons BY-NC-ND 3.0. All talks and text are
-    property of TED Conferences LLC. This new TED-LIUM release was made through a collaboration between the Ubiqus
-    company and the LIUM (University of Le Mans, France).
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    The structure of TEDLIUM release2 is the same as TEDLIUM release1, only the data is different.
-
-    .. code-block::
-
-        .
-        └──TEDLIUM_release1
-            └── dev
-                ├── sph
-                    ├── AlGore_2009.sph
-                    ├── BarrySchwartz_2005G.sph
-                ├── stm
-                    ├── AlGore_2009.stm
-                    ├── BarrySchwartz_2005G.stm
-            └── test
-                ├── sph
-                    ├── AimeeMullins_2009P.sph
-                    ├── BillGates_2010.sph
-                ├── stm
-                    ├── AimeeMullins_2009P.stm
-                    ├── BillGates_2010.stm
-            └── train
-                ├── sph
-                    ├── AaronHuey_2010X.sph
-                    ├── AdamGrosser_2007.sph
-                ├── stm
-                    ├── AaronHuey_2010X.stm
-                    ├── AdamGrosser_2007.stm
-            └── readme
-            └── TEDLIUM.150k.dic
-
-    .. code-block::
-
-        .
-        └──TEDLIUM_release-3
-            └── data
-                ├── ctl
-                ├── sph
-                    ├── 911Mothers_2010W.sph
-                    ├── AalaElKhani.sph
-                ├── stm
-                    ├── 911Mothers_2010W.stm
-                    ├── AalaElKhani.stm
-            └── doc
-            └── legacy
-            └── LM
-            └── speaker-adaptation
-            └── readme
-            └── TEDLIUM.150k.dic
-
-    Citation:
-
-    .. code-block::
-
-        @article{
-          title={TED-LIUM: an automatic speech recognition dedicated corpus},
-          author={A. Rousseau, P. Deléglise, Y. Estève},
-          journal={Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)},
-          year={May 2012},
-          biburl={https://www.openslr.org/7/}
-        }
-
-        @article{
-          title={Enhancing the TED-LIUM Corpus with Selected Data for Language Modeling and More TED Talks},
-          author={A. Rousseau, P. Deléglise, and Y. Estève},
-          journal={Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)},
-          year={May 2014},
-          biburl={https://www.openslr.org/19/}
-        }
-
-        @article{
-          title={TED-LIUM 3: twice as much data and corpus repartition for experiments on speaker adaptation},
-          author={François Hernandez, Vincent Nguyen, Sahar Ghannay, Natalia Tomashenko, and Yannick Estève},
-          journal={the 20th International Conference on Speech and Computer (SPECOM 2018)},
-          year={September 2018},
-          biburl={https://www.openslr.org/51/}
-        }
-    """
-
-    @check_tedlium_dataset
-    def __init__(self, dataset_dir, release, usage=None, extensions=None, num_samples=None,
-                 num_parallel_workers=None, shuffle=None, sampler=None, num_shards=None,
-                 shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.extensions = replace_none(extensions, ".sph")
-        self.release = release
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.TedliumNode(self.dataset_dir, self.release, self.usage, self.extensions, self.sampler)
-
-
-class _SVHNDataset:
-    """
-    Mainly for loading SVHN Dataset, and return two rows each time.
-    """
-
-    def __init__(self, dataset_dir, usage):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.usage = usage
-        self.column_names = ["image", "label"]
-        self.usage_all = ["train", "test", "extra"]
-        self.data = np.array([], dtype=np.uint8)
-        self.labels = np.array([], dtype=np.uint32)
-
-        if self.usage == "all":
-            for _usage in self.usage_all:
-                data, label = self._load_mat(_usage)
-                self.data = np.concatenate((self.data, data)) if self.data.size else data
-                self.labels = np.concatenate((self.labels, label)) if self.labels.size else label
-        else:
-            self.data, self.labels = self._load_mat(self.usage)
-
-    def _load_mat(self, mode):
-        filename = mode + "_32x32.mat"
-        mat_data = loadmat(os.path.join(self.dataset_dir, filename))
-        data = np.transpose(mat_data['X'], [3, 0, 1, 2])
-        label = mat_data['y'].astype(np.uint32).squeeze()
-        np.place(label, label == 10, 0)
-        return data, label
-
-    def __getitem__(self, index):
-        return self.data[index], self.labels[index]
-
-    def __len__(self):
-        return len(self.data)
-
-
-class SVHNDataset(GeneratorDataset):
-    """
-    A source dataset for reading and parsing SVHN dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset
-            (default=None, will read all samples).
-        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
-        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
-        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
-            (default=None, expected order behavior shown in the table).
-        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
-            input is required (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
-            sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
-            when num_shards is also specified. Random accessible input is required.
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not exist or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If usage is invalid.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> svhn_dataset_dir = "/path/to/svhn_dataset_directory"
-        >>> dataset = ds.SVHNDataset(dataset_dir=svhn_dataset_dir, usage="train")
-
-    About SVHN dataset:
-
-    SVHN dataset consists of 10 digit classes.
-    SVHN is obtained from house numbers in Google Street View images.
-    73257 digits for training, 26032 digits for testing, and 531131 additional extra training data.
-
-    Here is the original SVHN dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── svhn_dataset_dir
-             ├── train_32x32.mat
-             ├── test_32x32.mat
-             └── extra_32x32.mat
-
-    Citation:
-
-    .. code-block::
-
-        @article{
-          title={Reading Digits in Natural Images with Unsupervised Feature Learning},
-          author={Yuval Netzer, Tao Wang, Adam Coates, Alessandro Bissacco, Bo Wu, Andrew Y. Ng},
-          conference={NIPS Workshop on Deep Learning and Unsupervised Feature Learning 2011.},
-          year={2011},
-          publisher={NIPS}
-          url={http://ufldl.stanford.edu/housenumbers}
-        }
-
-    """
-
-    @check_svhn_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=1, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None):
-        self.dataset_dir = os.path.realpath(dataset_dir)
-        self.usage = replace_none(usage, "all")
-        dataset = _SVHNDataset(self.dataset_dir, self.usage)
-
-        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
-                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
-                         num_shards=num_shards, shard_id=shard_id)
-
-
-class STL10Dataset(MappableDataset):
-    """
-    A source dataset for reading and parsing STL10 dataset.
-
-    The generated dataset has two columns: :py:obj:`[image, label]`.
-    The tensor of column :py:obj:`image` is of the uint8 type.
-    The tensor of column :py:obj:`label` is of a scalar of int32 type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be "train", "test",
-            "unlabeled", "train+unlabeled" or "all" . "train" will read from 5,000
-            train samples, "test" will read from 8,000 test samples,
-            "unlabeled" will read from all 100,000 samples, and "train+unlabeled"
-            will read from 105000 samples, "all" will read all the samples
-            (default=None, all samples).
-        num_samples (int, optional): The number of images to be included in the dataset.
-            (default=None, all images).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
-            order behavior shown in the table).
-        sampler (Sampler, optional): Object used to choose samples from the
-            dataset (default=None, expected order behavior shown in the table).
-        num_shards (int, optional): Number of shards that the dataset will be divided
-            into (default=None). When this argument is specified, 'num_samples' reflects
-            the max sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir is not valid or does not exist or does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If sampler and shuffle are specified at the same time.
-        RuntimeError: If sampler and sharding are specified at the same time.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If usage is invalid.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Note:
-        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
-          The table below shows what input arguments are allowed and their expected behavior.
-
-    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
-       :widths: 25 25 50
-       :header-rows: 1
-
-       * - Parameter 'sampler'
-         - Parameter 'shuffle'
-         - Expected Order Behavior
-       * - None
-         - None
-         - random order
-       * - None
-         - True
-         - random order
-       * - None
-         - False
-         - sequential order
-       * - Sampler object
-         - None
-         - order defined by sampler
-       * - Sampler object
-         - True
-         - not allowed
-       * - Sampler object
-         - False
-         - not allowed
-
-    Examples:
-        >>> stl10_dataset_dir = "/path/to/stl10_dataset_directory"
-        >>>
-        >>> # 1) Get all samples from STL10 dataset in sequence
-        >>> dataset = ds.STL10Dataset(dataset_dir=stl10_dataset_dir, shuffle=False)
-        >>>
-        >>> # 2) Randomly select 350 samples from STL10 dataset
-        >>> dataset = ds.STL10Dataset(dataset_dir=stl10_dataset_dir, num_samples=350, shuffle=True)
-        >>>
-        >>> # 3) Get samples from STL10 dataset for shard 0 in a 2-way distributed training
-        >>> dataset = ds.STL10Dataset(dataset_dir=stl10_dataset_dir, num_shards=2, shard_id=0)
-
-    About STL10 dataset:
-
-    STL10 dataset consists of 10 classes: airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck.
-    STL10 is is inspired by the CIFAR-10 dataset.
-    Images are 96x96 pixels, color.
-    500 training images, 800 test images per class and 100000 unlabeled images.
-    Labels are 0-indexed, and unlabeled images have -1 as their labels.
-
-    Here is the original STL10 dataset structure.
-    You can unzip the dataset files into this directory structure and read by MindSpore's API.
-
-    .. code-block::
-        .
-        └── stl10_dataset_dir
-             ├── train_X.bin
-             ├── train_y.bin
-             ├── test_X.bin
-             ├── test_y.bin
-             └── unlabeled_X.bin
-
-    Citation of STL10 dataset.
-
-    .. code-block::
-
-        @techreport{Coates10,
-        author       = {Adam Coates},
-        title        = {Learning multiple layers of features from tiny images},
-        year         = {20010},
-        howpublished = {https://cs.stanford.edu/~acoates/stl10/},
-        description  = {The STL-10 dataset consists of 96x96 RGB images in 10 classes,
-                        with 500 training images and 800 testing images per class.
-                        There are 5000 training images and 8000 test images.
-                        It also has 100000 unlabeled images for unsupervised learning.
-                        These examples are extracted from a similar but broader distribution of images.
-                        }
-        }
-    """
-
-    @check_stl10_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
-                 sampler=None, num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
-                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.STL10Node(self.dataset_dir, self.usage, self.sampler)
-
-
-class EnWik9Dataset(SourceDataset):
-    """
-    A source dataset that reads and parses EnWik9 dataset.
-
-    The generated dataset has one column :py:obj:`[text]` with type string.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will include all samples).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=True).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Examples:
-        >>> en_wik9_dataset_dir = "/path/to/en_wik9_dataset"
-        >>> dataset2 = ds.EnWik9Dataset(dataset_dir=en_wik9_dataset_dir, num_samples=2,
-        ...                             shuffle=True)
-
-    About EnWik9 dataset:
-
-    The data of EnWik9 is UTF-8 encoded XML consisting primarily of English text. It contains 243,426 article titles,
-    of which 85,560 are #REDIRECT to fix broken links, and the rest are regular articles.
-
-    The data is UTF-8 clean. All characters are in the range U'0000 to U'10FFFF with valid encodings of 1 to
-    4 bytes. The byte values 0xC0, 0xC1, and 0xF5-0xFF never occur. Also, in the Wikipedia dumps,
-    there are no control characters in the range 0x00-0x1F except for 0x09 (tab) and 0x0A (linefeed).
-    Linebreaks occur only on paragraph boundaries, so they always have a semantic purpose.
-
-    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
-
-    .. code-block::
-
-        .
-        └── EnWik9
-             ├── enwik9
-
-    Citation:
-
-    .. code-block::
-
-        @NetworkResource{Hutter_prize,
-        author    = {English Wikipedia},
-        url       = "https://cs.fit.edu/~mmahoney/compression/textdata.html",
-        month     = {March},
-        year      = {2006}
-        }
-    """
-
-    @check_en_wik9_dataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=True,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-
-    def parse(self, children=None):
-        return cde.EnWik9Node(self.dataset_dir, self.num_samples, self.shuffle_flag, self.num_shards,
-                              self.shard_id)
-
-
-class YahooAnswersDataset(SourceDataset):
-    """
-    A source dataset that reads and parses the YahooAnswers dataset.
-
-    The generated dataset has three columns :py:obj:`[class, title, content, answer]`.
-    The tensor of column :py:obj:`class` is of the string type.
-    The tensor of column :py:obj:`title` is of the string type.
-    The tensor of column :py:obj:`content` is of the string type.
-    The tensor of column :py:obj:`answer` is of the string type.
-
-    Args:
-        dataset_dir (str): Path to the root directory that contains the dataset.
-        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`. `train` will read
-            from 1,400,000 train samples, `test` will read from 60,000 test samples, `all` will read from
-            all 1,460,000 samples (default=None, all samples).
-        num_samples (int, optional): The number of samples to be included in the dataset
-            (default=None, will include all text).
-        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=None, number set in the config).
-        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
-            (default=Shuffle.GLOBAL).
-            If shuffle is False, no shuffling will be performed;
-            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
-            Otherwise, there are two levels of shuffling:
-
-            - Shuffle.GLOBAL: Shuffle both the files and samples.
-
-            - Shuffle.FILES: Shuffle files only.
-
-        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
-            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
-        shard_id (int, optional): The shard ID within num_shards (default=None). This
-            argument can only be specified when num_shards is also specified.
-        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
-            (default=None, which means no cache is used).
-
-    Raises:
-        RuntimeError: If dataset_dir does not contain data files.
-        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
-        RuntimeError: If num_shards is specified but shard_id is None.
-        RuntimeError: If shard_id is specified but num_shards is None.
-        ValueError: If shard_id is invalid (< 0 or >= num_shards).
-
-    Examples:
-        >>> yahoo_answers_dataset_dir = "/path/to/yahoo_answers_dataset_directory"
-        >>>
-        >>> # 1) Read 3 samples from YahooAnswers dataset
-        >>> dataset = ds.YahooAnswersDataset(dataset_dir=yahoo_answers_dataset_dir, num_samples=3)
-        >>>
-        >>> # 2) Read train samples from YahooAnswers dataset
-        >>> dataset = ds.YahooAnswersDataset(dataset_dir=yahoo_answers_dataset_dir, usage="train")
-
-    About YahooAnswers dataset:
-
-    The YahooAnswers dataset consists of 630,000 text samples in 14 classes,
-    There are 560,000 samples in the train.csv and 70,000 samples in the test.csv.
-    The 10 different classes represent Society & Culture, Science & Mathematics, Health, Education & Reference,
-    Computers & Internet, Sports, Business & Finance, Entertainment & Music, Family & Relationships,
-    Politics & Government.
-
-    Here is the original YahooAnswers dataset structure.
-    You can unzip the dataset files into this directory structure and read by Mindspore's API.
-
-    .. code-block::
-
-        .
-        └── yahoo_answers_dataset_dir
-            ├── train.csv
-            ├── test.csv
-            ├── classes.txt
-            └── readme.txt
-
-    .. code-block::
-
-        @article{YahooAnswers,
-        title   = {Yahoo! Answers Topic Classification Dataset},
-        author  = {Xiang Zhang},
-        year    = {2015},
-        howpublished = {}
-        }
-    """
-
-    @check_yahoo_answers_dataset
-    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
-                 num_shards=None, shard_id=None, cache=None):
-        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
-                         num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "all")
-
-    def parse(self, children=None):
-        return cde.YahooAnswersNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
-                                    self.num_shards, self.shard_id)
diff --git a/mindspore/python/mindspore/dataset/engine/datasets_audio.py b/mindspore/python/mindspore/dataset/engine/datasets_audio.py
new file mode 100644
index 00000000000..c87a266569a
--- /dev/null
+++ b/mindspore/python/mindspore/dataset/engine/datasets_audio.py
@@ -0,0 +1,612 @@
+# Copyright 2019-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+This dataset module supports various formats of datasets, including ImageNet, TFData,
+MNIST, Cifar10/100, Manifest, MindRecord, and more. This module loads data with
+high performance and parses data precisely. Some of the operations that are
+provided to users to preprocess data include shuffle, batch, repeat, map, and zip.
+"""
+import mindspore._c_dataengine as cde
+
+from .datasets import MappableDataset
+from .validators import check_lj_speech_dataset, check_yes_no_dataset, check_speech_commands_dataset, \
+    check_tedlium_dataset
+
+from ..core.validator_helpers import replace_none
+
+
+class LJSpeechDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing LJSpeech dataset.
+
+    The generated dataset has four columns :py:obj:`[waveform, sample_rate, transcription, normalized_transcript]`.
+    The tensor of column :py:obj:`waveform` is a tensor of the float32 type.
+    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
+    The tensor of column :py:obj:`transcription` is a scalar of the string type.
+    The tensor of column :py:obj:`normalized_transcript` is a scalar of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_samples (int, optional): The number of audios to be included in the dataset
+            (default=None, all audios).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> lj_speech_dataset_dir = "/path/to/lj_speech_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from LJSPEECH dataset in sequence
+        >>> dataset = ds.LJSpeechDataset(dataset_dir=lj_speech_dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from LJSPEECH dataset
+        >>> dataset = ds.LJSpeechDataset(dataset_dir=lj_speech_dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # 3) Get samples from LJSPEECH dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.LJSpeechDataset(dataset_dir=lj_speech_dataset_dir, num_shards=2, shard_id=0)
+        >>>
+        >>> # In LJSPEECH dataset, each dictionary has keys "waveform", "sample_rate", "transcription"
+        >>> # and "normalized_transcript"
+
+    About LJSPEECH dataset:
+
+    This is a public domain speech dataset consisting of 13,100 short audio clips of a single speaker
+    reading passages from 7 non-fiction books. A transcription is provided for each clip.
+    Clips vary in length from 1 to 10 seconds and have a total length of approximately 24 hours.
+
+    The texts were published between 1884 and 1964, and are in the public domain.
+    The audio was recorded in 2016-17 by the LibriVox project and is also in the public domain.
+
+    Here is the original LJSPEECH dataset structure.
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── LJSpeech-1.1
+            ├── README
+            ├── metadata.csv
+            └── wavs
+                ├── LJ001-0001.wav
+                ├── LJ001-0002.wav
+                ├── LJ001-0003.wav
+                ├── LJ001-0004.wav
+                ├── LJ001-0005.wav
+                ├── LJ001-0006.wav
+                ├── LJ001-0007.wav
+                ├── LJ001-0008.wav
+                ...
+                ├── LJ050-0277.wav
+                └── LJ050-0278.wav
+
+    Citation:
+
+    .. code-block::
+
+        @misc{lj_speech17,
+        author       = {Keith Ito and Linda Johnson},
+        title        = {The LJ Speech Dataset},
+        howpublished = {url{https://keithito.com/LJ-Speech-Dataset}},
+        year         = 2017
+        }
+    """
+
+    @check_lj_speech_dataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+
+    def parse(self, children=None):
+        return cde.LJSpeechNode(self.dataset_dir, self.sampler)
+
+
+class SpeechCommandsDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the SpeechCommands dataset.
+
+    The generated dataset has five columns :py:obj:`[waveform, sample_rate, label, speaker_id, utterance_number]`.
+    The tensor of column :py:obj:`waveform` is a vector of the float32 type.
+    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
+    The tensor of column :py:obj:`label` is a scalar of the string type.
+    The tensor of column :py:obj:`speaker_id` is a scalar of the string type.
+    The tensor of column :py:obj:`utterance_number` is a scalar of the int32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train`
+            will read from 84,843 samples, `test` will read from 11,005 samples, `valid` will read from 9,981
+            test samples and `all` will read from all 105,829 samples (default=None, will read all samples).
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will read all samples).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only be specified
+            when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> speech_commands_dataset_dir = "/path/to/speech_commands_dataset_directory"
+        >>>
+        >>> # Read 3 samples from SpeechCommands dataset
+        >>> dataset = ds.SpeechCommandsDataset(dataset_dir=speech_commands_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In SpeechCommands dataset, each dictionary has keys "waveform", "sample_rate", "label",
+        >>> # "speaker_id" and "utterance_number".
+
+    About SpeechCommands dataset:
+
+    The SpeechCommands is database for limited_vocabulary speech recognition, containing 105,829 audio samples of
+    '.wav' format.
+
+    Here is the original SpeechCommands dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── speech_commands_dataset_dir
+             ├── cat
+                  ├── b433eff_nohash_0.wav
+                  ├── 5a33edf_nohash_1.wav
+                  └──....
+             ├── dog
+                  ├── b433w2w_nohash_0.wav
+                  └──....
+             ├── four
+             └── ....
+
+    Citation:
+
+    .. code-block::
+        @article{2018Speech,
+        title={Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition},
+        author={Warden, P.},
+        year={2018}
+        }
+    """
+
+    @check_speech_commands_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.SpeechCommandsNode(self.dataset_dir, self.usage, self.sampler)
+
+
+class TedliumDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Tedlium dataset.
+    The columns of generated dataset depend on the source SPH files and the corresponding STM files.
+
+    The generated dataset has six columns :py:obj:`[waveform, sample_rate, transcript, talk_id, speaker_id,
+    identifier]`.
+
+    The tensor of column :py:obj:`waveform` is of the float32 type.
+    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
+    The tensor of column :py:obj:`transcript` is a scalar of the string type.
+    The tensor of column :py:obj:`talk_id` is a scalar of the string type.
+    The tensor of column :py:obj:`speaker_id` is a scalar of the string type.
+    The tensor of column :py:obj:`identifier` is a scalar of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        release (str): Release of the dataset, can be "release1", "release2", "release3".
+        usage (str, optional): Usage of this dataset.
+            For release1 or release2, can be `train`, `test`, ` dev` or `all`.
+            `train` will read from train samples,
+            `test` will read from test samples,
+            `dev` will read from dev samples,
+            `all` will read from all samples.
+            For release3, can only be "all", it will read from data samples (default=None, all samples).
+        extensions (str): Extensions of the SPH files, only '.sph' is valid.
+            (default=None, ".sph").
+        num_samples (int, optional): The number of audio samples to be included in the dataset
+            (default=None, all samples).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain stm files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> # 1) Get all train samples from TEDLIUM_release1 dataset in sequence.
+        >>> dataset = ds.TedliumDataset(dataset_dir="/path/to/tedlium1_dataset_directory",
+        ...                             release="release1", shuffle=False)
+        >>>
+        >>> # 2) Randomly select 10 samples from TEDLIUM_release2 dataset.
+        >>> dataset = ds.TedliumDataset(dataset_dir="/path/to/tedlium2_dataset_directory",
+        ...                             release="release2", num_samples=10, shuffle=True)
+        >>>
+        >>> # 3) Get samples from TEDLIUM_release-3 dataset for shard 0 in a 2-way distributed training.
+        >>> dataset = ds.TedliumDataset(dataset_dir="/path/to/tedlium3_dataset_directory",
+        ...                             release="release3", num_shards=2, shard_id=0)
+        >>>
+        >>> # In TEDLIUM dataset, each dictionary has keys : waveform, sample_rate, transcript, talk_id,
+        >>> # speaker_id and identifier.
+
+    About TEDLIUM_release1 dataset:
+
+    The TED-LIUM corpus is English-language TED talks, with transcriptions, sampled at 16kHz.
+    It contains about 118 hours of speech.
+
+    About TEDLIUM_release2 dataset:
+
+    This is the TED-LIUM corpus release 2, licensed under Creative Commons BY-NC-ND 3.0. All talks and text are
+    property of TED Conferences LLC. The TED-LIUM corpus was made from audio talks and their transcriptions available
+    on the TED website. We have prepared and filtered these data in order to train acoustic models to participate to
+    the International Workshop on Spoken Language Translation 2011 (the LIUM English/French SLT system reached the
+    first rank in the SLT task).
+
+    About TEDLIUM_release-3 dataset:
+
+    This is the TED-LIUM corpus release 3, licensed under Creative Commons BY-NC-ND 3.0. All talks and text are
+    property of TED Conferences LLC. This new TED-LIUM release was made through a collaboration between the Ubiqus
+    company and the LIUM (University of Le Mans, France).
+
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    The structure of TEDLIUM release2 is the same as TEDLIUM release1, only the data is different.
+
+    .. code-block::
+
+        .
+        └──TEDLIUM_release1
+            └── dev
+                ├── sph
+                    ├── AlGore_2009.sph
+                    ├── BarrySchwartz_2005G.sph
+                ├── stm
+                    ├── AlGore_2009.stm
+                    ├── BarrySchwartz_2005G.stm
+            └── test
+                ├── sph
+                    ├── AimeeMullins_2009P.sph
+                    ├── BillGates_2010.sph
+                ├── stm
+                    ├── AimeeMullins_2009P.stm
+                    ├── BillGates_2010.stm
+            └── train
+                ├── sph
+                    ├── AaronHuey_2010X.sph
+                    ├── AdamGrosser_2007.sph
+                ├── stm
+                    ├── AaronHuey_2010X.stm
+                    ├── AdamGrosser_2007.stm
+            └── readme
+            └── TEDLIUM.150k.dic
+
+    .. code-block::
+
+        .
+        └──TEDLIUM_release-3
+            └── data
+                ├── ctl
+                ├── sph
+                    ├── 911Mothers_2010W.sph
+                    ├── AalaElKhani.sph
+                ├── stm
+                    ├── 911Mothers_2010W.stm
+                    ├── AalaElKhani.stm
+            └── doc
+            └── legacy
+            └── LM
+            └── speaker-adaptation
+            └── readme
+            └── TEDLIUM.150k.dic
+
+    Citation:
+
+    .. code-block::
+
+        @article{
+          title={TED-LIUM: an automatic speech recognition dedicated corpus},
+          author={A. Rousseau, P. Deléglise, Y. Estève},
+          journal={Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)},
+          year={May 2012},
+          biburl={https://www.openslr.org/7/}
+        }
+
+        @article{
+          title={Enhancing the TED-LIUM Corpus with Selected Data for Language Modeling and More TED Talks},
+          author={A. Rousseau, P. Deléglise, and Y. Estève},
+          journal={Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12)},
+          year={May 2014},
+          biburl={https://www.openslr.org/19/}
+        }
+
+        @article{
+          title={TED-LIUM 3: twice as much data and corpus repartition for experiments on speaker adaptation},
+          author={François Hernandez, Vincent Nguyen, Sahar Ghannay, Natalia Tomashenko, and Yannick Estève},
+          journal={the 20th International Conference on Speech and Computer (SPECOM 2018)},
+          year={September 2018},
+          biburl={https://www.openslr.org/51/}
+        }
+    """
+
+    @check_tedlium_dataset
+    def __init__(self, dataset_dir, release, usage=None, extensions=None, num_samples=None,
+                 num_parallel_workers=None, shuffle=None, sampler=None, num_shards=None,
+                 shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.extensions = replace_none(extensions, ".sph")
+        self.release = release
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.TedliumNode(self.dataset_dir, self.release, self.usage, self.extensions, self.sampler)
+
+
+class YesNoDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the YesNo dataset.
+
+    The generated dataset has three columns :py:obj:`[waveform, sample_rate, labels]`.
+    The tensor of column :py:obj:`waveform` is a vector of the float32 type.
+    The tensor of column :py:obj:`sample_rate` is a scalar of the int32 type.
+    The tensor of column :py:obj:`labels` is a scalar of the int32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only
+            be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> yes_no_dataset_dir = "/path/to/yes_no_dataset_directory"
+        >>>
+        >>> # Read 3 samples from YesNo dataset
+        >>> dataset = ds.YesNoDataset(dataset_dir=yes_no_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In YesNo dataset, each dictionary has keys "waveform", "sample_rate", "label"
+
+    About YesNo dataset:
+
+    Yesno is an audio dataset consisting of 60 recordings of one individual saying yes or no in Hebrew; each
+    recording is eight words long. It was created for the Kaldi audio project by an author who wishes to
+    remain anonymous.
+
+    Here is the original YesNo dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── yes_no_dataset_dir
+             ├── 1_1_0_0_1_1_0_0.wav
+             ├── 1_0_0_0_1_1_0_0.wav
+             ├── 1_1_0_0_1_1_0_0.wav
+             └──....
+
+    Citation:
+
+    .. code-block::
+
+        @NetworkResource{Kaldi_audio_project,
+        author    = {anonymous},
+        url       = "http://wwww.openslr.org/1/"
+        }
+    """
+
+    @check_yes_no_dataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+
+    def parse(self, children=None):
+        return cde.YesNoNode(self.dataset_dir, self.sampler)
diff --git a/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py b/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py
new file mode 100644
index 00000000000..13d0f21e878
--- /dev/null
+++ b/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py
@@ -0,0 +1,247 @@
+# Copyright 2019-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+This dataset module supports various formats of datasets, including ImageNet, TFData,
+MNIST, Cifar10/100, Manifest, MindRecord, and more. This module loads data with
+high performance and parses data precisely. Some of the operations that are
+provided to users to preprocess data include shuffle, batch, repeat, map, and zip.
+"""
+import numpy as np
+
+import mindspore._c_dataengine as cde
+
+from mindspore import log as logger
+from .datasets import MappableDataset, SourceDataset, TextBaseDataset, Shuffle, Schema, \
+    shuffle_to_shuffle_mode, shuffle_to_bool
+from .validators import check_minddataset, check_tfrecorddataset
+
+from ..core.validator_helpers import replace_none
+from . import samplers
+
+
+class MindDataset(MappableDataset, TextBaseDataset):
+    """
+    A source dataset for reading and parsing MindRecord dataset.
+
+    The columns of generated dataset depend on the source MindRecord files.
+
+    Args:
+        dataset_files (Union[str, list[str]]): If dataset_file is a str, it represents for
+            a file name of one component of a mindrecord source, other files with identical source
+            in the same path will be found and loaded automatically. If dataset_file is a list,
+            it represents for a list of dataset files to be read directly.
+        columns_list (list[str], optional): List of columns to be read (default=None).
+        num_parallel_workers (int, optional): The number of readers (default=None).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=None, performs global shuffle).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are three levels of shuffling:
+
+            - Shuffle.GLOBAL: Global shuffle of all rows of data in dataset.
+
+            - Shuffle.FILES: Shuffle the file sequence but keep the order of data within each file.
+
+            - Shuffle.INFILE: Keep the file sequence the same but shuffle the data within each file.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, sampler is exclusive
+            with shuffle and block_reader). Support list: SubsetRandomSampler,
+            PkSampler, RandomSampler, SequentialSampler, DistributedSampler.
+        padded_sample (dict, optional): Samples will be appended to dataset, where
+            keys are the same as column_list.
+        num_padded (int, optional): Number of padding samples. Dataset size
+            plus num_padded should be divisible by num_shards.
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, all samples).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_files are not valid or do not exist.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> mind_dataset_dir = ["/path/to/mind_dataset_file"] # contains 1 or multiple MindRecord files
+        >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
+    """
+
+    def parse(self, children=None):
+        return cde.MindDataNode(self.dataset_files, self.columns_list, self.sampler, self.new_padded_sample,
+                                self.num_padded, shuffle_to_shuffle_mode(self.shuffle_option))
+
+    @check_minddataset
+    def __init__(self, dataset_files, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None,
+                 shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle_to_bool(shuffle), num_shards=num_shards, shard_id=shard_id, cache=cache)
+        if shuffle is not None and not isinstance(shuffle, (bool, Shuffle)):
+            raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like 'Shuffle.GLOBAL' or "
+                            "'Shuffle.FILES' or 'Shuffle.INFILE'.")
+        if num_samples and shuffle in (Shuffle.FILES, Shuffle.INFILE):
+            raise ValueError("'Shuffle.FILES' or 'Shuffle.INFILE' and 'num_samples' "
+                             "cannot be specified at the same time.")
+        self.shuffle_option = shuffle
+        if isinstance(dataset_files, list):
+            self.load_dataset = False
+        else:
+            self.load_dataset = True
+        self.dataset_files = dataset_files
+        self.columns_list = replace_none(columns_list, [])
+
+        if shuffle is False:
+            logger.warning("WARN: global shuffle is not used.")
+
+        if sampler is not None:
+            if isinstance(sampler, (
+                    samplers.SubsetRandomSampler, samplers.SubsetSampler, samplers.PKSampler,
+                    samplers.DistributedSampler,
+                    samplers.RandomSampler, samplers.SequentialSampler)) is False:
+                raise ValueError("The sampler is not supported yet.")
+
+        self.padded_sample = padded_sample
+        self.num_padded = replace_none(num_padded, 0)
+
+        self.new_padded_sample = {}
+        if padded_sample:
+            for k, v in padded_sample.items():
+                if isinstance(v, np.ndarray):
+                    self.new_padded_sample[k] = v.tobytes()
+                else:
+                    self.new_padded_sample[k] = v
+
+
+class TFRecordDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset for reading and parsing datasets stored on disk in TFData format.
+
+    The columns of generated dataset depend on the source TFRecord files.
+
+    Args:
+        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
+            pattern of files. The list will be sorted in a lexicographical order.
+        schema (Union[str, Schema], optional): Path to the JSON schema file or schema object (default=None).
+            If the schema is not provided, the meta data from the TFData file is considered the schema.
+        columns_list (list[str], optional): List of columns to be read (default=None, read all columns).
+        num_samples (int, optional): The number of samples (rows) to be included in the dataset (default=None).
+            If num_samples is None and numRows(parsed from schema) does not exist, read the full dataset;
+            If num_samples is None and numRows(parsed from schema) is greater than 0, read numRows rows;
+            If both num_samples and numRows(parsed from schema) are greater than 0, read num_samples rows.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        shard_equal_rows (bool, optional): Get equal rows for all shards(default=False). If shard_equal_rows
+            is false, number of rows of each shard may be not equal, and may lead to a failure in distributed training.
+            When the number of samples of per TFRecord file are not equal, it is suggested to set to true.
+            This argument should only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_files are not valid or do not exist.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Examples:
+        >>> from mindspore import dtype as mstype
+        >>>
+        >>> tfrecord_dataset_dir = ["/path/to/tfrecord_dataset_file"] # contains 1 or multiple TFRecord files
+        >>> tfrecord_schema_file = "/path/to/tfrecord_schema_file"
+        >>>
+        >>> # 1) Get all rows from tfrecord_dataset_dir with no explicit schema.
+        >>> # The meta-data in the first row will be used as a schema.
+        >>> dataset = ds.TFRecordDataset(dataset_files=tfrecord_dataset_dir)
+        >>>
+        >>> # 2) Get all rows from tfrecord_dataset_dir with user-defined schema.
+        >>> schema = ds.Schema()
+        >>> schema.add_column(name='col_1d', de_type=mstype.int64, shape=[2])
+        >>> dataset = ds.TFRecordDataset(dataset_files=tfrecord_dataset_dir, schema=schema)
+        >>>
+        >>> # 3) Get all rows from tfrecord_dataset_dir with schema file.
+        >>> dataset = ds.TFRecordDataset(dataset_files=tfrecord_dataset_dir, schema=tfrecord_schema_file)
+    """
+
+    @check_tfrecorddataset
+    def __init__(self, dataset_files, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
+                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_files = self._find_files(dataset_files)
+        self.dataset_files.sort()
+
+        self.schema = schema
+        self.columns_list = replace_none(columns_list, [])
+        self.shard_equal_rows = replace_none(shard_equal_rows, False)
+
+        if self.schema is not None and (self.num_samples is None or self.num_samples == 0):
+            self.num_samples = Schema.get_num_rows(self.schema)
+
+    def parse(self, children=None):
+        schema = self.schema.cpp_schema if isinstance(self.schema, Schema) else self.schema
+        return cde.TFRecordNode(self.dataset_files, schema, self.columns_list, self.num_samples, self.shuffle_flag,
+                                self.num_shards, self.shard_id, self.shard_equal_rows)
diff --git a/mindspore/python/mindspore/dataset/engine/datasets_text.py b/mindspore/python/mindspore/dataset/engine/datasets_text.py
new file mode 100644
index 00000000000..447d1a04314
--- /dev/null
+++ b/mindspore/python/mindspore/dataset/engine/datasets_text.py
@@ -0,0 +1,1593 @@
+# Copyright 2019-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+This dataset module supports various formats of datasets, including ImageNet, TFData,
+MNIST, Cifar10/100, Manifest, MindRecord, and more. This module loads data with
+high performance and parses data precisely. Some of the operations that are
+provided to users to preprocess data include shuffle, batch, repeat, map, and zip.
+"""
+import mindspore._c_dataengine as cde
+
+from .datasets import MappableDataset, SourceDataset, TextBaseDataset, Shuffle
+from .validators import check_imdb_dataset, check_iwslt2016_dataset, check_iwslt2017_dataset, \
+    check_penn_treebank_dataset, check_ag_news_dataset, check_amazon_review_dataset, check_udpos_dataset, \
+    check_wiki_text_dataset, check_conll2000_dataset, check_cluedataset, check_csvdataset, \
+    check_sogou_news_dataset, check_textfiledataset, check_dbpedia_dataset, check_yelp_review_dataset, \
+    check_en_wik9_dataset, check_yahoo_answers_dataset
+
+from ..core.validator_helpers import replace_none
+
+
+class AGNewsDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses AG News datasets.
+
+    The generated dataset has three columns: :py:obj:`[index, title, description]`.
+    The tensor of column :py:obj:`index` is of the string type.
+    The tensor of column :py:obj:`title` is of the string type.
+    The tensor of column :py:obj:`description` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Acceptable usages include `train`, `test` and `all` (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Examples:
+        >>> ag_news_dataset_dir = "/path/to/ag_news_dataset_file"
+        >>> dataset = ds.AGNewsDataset(dataset_dir=ag_news_dataset_dir, usage='all')
+
+    About AGNews dataset:
+
+    AG is a collection of over 1 million news articles. The news articles were collected
+    by ComeToMyHead from over 2,000 news sources in over 1 year of activity. ComeToMyHead
+    is an academic news search engine that has been in operation since July 2004.
+    The dataset is provided by academics for research purposes such as data mining
+    (clustering, classification, etc.), information retrieval (ranking, searching, etc.),
+    xml, data compression, data streaming, and any other non-commercial activities.
+    AG's news topic classification dataset was constructed by selecting the four largest
+    classes from the original corpus. Each class contains 30,000 training samples and
+    1,900 test samples. The total number of training samples in train.csv is 120,000
+    and the number of test samples in test.csv is 7,600.
+
+    You can unzip the dataset files into the following structure and read by MindSpore's API:
+
+    .. code-block::
+
+        .
+        └── ag_news_dataset_dir
+            ├── classes.txt
+            ├── train.csv
+            ├── test.csv
+            └── readme.txt
+
+    Citation:
+
+    .. code-block::
+
+        @misc{zhang2015characterlevel,
+        title={Character-level Convolutional Networks for Text Classification},
+        author={Xiang Zhang and Junbo Zhao and Yann LeCun},
+        year={2015},
+        eprint={1509.01626},
+        archivePrefix={arXiv},
+        primaryClass={cs.LG}
+        }
+    """
+
+    @check_ag_news_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None,
+                 num_parallel_workers=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.AGNewsNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                              self.shard_id)
+
+
+class AmazonReviewDataset(SourceDataset):
+    """
+    A source dataset that reads and parses Amazon Review Polarity and Amazon Review Full datasets.
+
+    The generated dataset has three columns: :py:obj:`[label, title, content]`.
+    The tensor of column :py:obj:`label` is of the string type.
+    The tensor of column :py:obj:`title` is of the string type.
+    The tensor of column :py:obj:`content` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
+            or the Amazon Review Full dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` (default= `all`).
+            For Polarity dataset, `train` will read from 3,600,000 train samples,
+            `test` will read from 400,000 test samples,
+            `all` will read from all 4,000,000 samples.
+             For Full dataset, `train` will read from 3,000,000 train samples,
+            `test` will read from 650,000 test samples,
+            `all` will read from all 3,650,000 samples (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to be read (default=None, reads the full dataset).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the  mindspore.dataset.config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> amazon_review_dataset_dir = "/path/to/amazon_review_dataset_dir"
+        >>> dataset = ds.AmazonReviewDataset(dataset_dir=amazon_review_dataset_dir, usage='all')
+
+    About AmazonReview Dataset:
+
+    The Amazon reviews full dataset consists of reviews from Amazon. The data span a period of 18 years, including ~35
+    million reviews up to March 2013. Reviews include product and user information, ratings, and a plaintext review.
+    The dataset is mainly used for text classification, given the content and title, predict the correct star rating.
+
+    The Amazon reviews polarity dataset is constructed by taking review score 1 and 2 as negative, 4 and 5 as positive.
+    Samples of score 3 is ignored. In the dataset, class 1 is the negative and class 2 is the positive.
+
+    The Amazon Reviews Polarity and Amazon Reviews Full datasets have the same directory structures.
+    You can unzip the dataset files into the following structure and read by MindSpore's API:
+
+    .. code-block::
+
+        .
+        └── amazon_review_dir
+             ├── train.csv
+             ├── test.csv
+             └── readme.txt
+
+   Citation:
+
+    .. code-block::
+
+        @article{zhang2015character,
+          title={Character-level convolutional networks for text classification},
+          author={Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
+          journal={Advances in neural information processing systems},
+          volume={28},
+          pages={649--657},
+          year={2015}
+        }
+    """
+
+    @check_amazon_review_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+
+    def parse(self, children=None):
+        return cde.AmazonReviewNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                                    self.shard_id)
+
+
+class CLUEDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses CLUE datasets.
+    Supported CLUE classification tasks: `AFQMC`, `TNEWS`, `IFLYTEK`, `CMNLI`, `WSC` and `CSL`.
+
+    The generated dataset with different task setting has different output columns:
+
+    - task = :py:obj:`AFQMC`
+        - usage = :py:obj:`train`, output columns: :py:obj:`[sentence1, dtype=string]`, \
+            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
+        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint8]`, \
+            :py:obj:`[sentence1, dtype=string]`, :py:obj:`[sentence2, dtype=string]`.
+        - usage = :py:obj:`eval`, output columns: :py:obj:`[sentence1, dtype=string]`, \
+            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
+
+    - task = :py:obj:`TNEWS`
+        - usage = :py:obj:`train`, output columns: :py:obj:`[label, dtype=string]`, \
+            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, :py:obj:`[keywords, dtype=string]`.
+        - usage = :py:obj:`test`, output columns: :py:obj:`[label, dtype=string]`, \
+            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, :py:obj:`[keywords, dtype=string]`.
+        - usage = :py:obj:`eval`, output columns: :py:obj:`[label, dtype=string]`, \
+            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`, :py:obj:`[keywords, dtype=string]`.
+
+    - task = :py:obj:`IFLYTEK`
+        - usage = :py:obj:`train`, output columns: :py:obj:`[label, dtype=string]`, \
+            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
+        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=string]`, \
+            :py:obj:`[sentence, dtype=string]`.
+        - usage = :py:obj:`eval`, output columns: :py:obj:`[label, dtype=string]`, \
+            :py:obj:`[label_des, dtype=string]`, :py:obj:`[sentence, dtype=string]`.
+
+    - task = :py:obj:`CMNLI`
+        - usage = :py:obj:`train`, output columns: :py:obj:`[sentence1, dtype=string]`, \
+            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
+        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint8]`, \
+            :py:obj:`[sentence1, dtype=string]`, :py:obj:`[sentence2, dtype=string]`.
+        - usage = :py:obj:`eval`, output columns: :py:obj:`[sentence1, dtype=string]`, \
+            :py:obj:`[sentence2, dtype=string]`, :py:obj:`[label, dtype=string]`.
+
+    - task = :py:obj:`WSC`
+        - usage = :py:obj:`train`, output columns: :py:obj:`[span1_index, dtype=uint8]`, \
+            :py:obj:`[span2_index, dtype=uint8]`, :py:obj:`[span1_text, dtype=string]`, \
+            :py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint8]`, \
+            :py:obj:`[text, dtype=string]`, :py:obj:`[label, dtype=string]`.
+        - usage = :py:obj:`test`, output columns: :py:obj:`[span1_index, dtype=uint8]`, \
+            :py:obj:`[span2_index, dtype=uint8]`, :py:obj:`[span1_text, dtype=string]`, \
+            :py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint8]`, :py:obj:`[text, dtype=string]`.
+        - usage = :py:obj:`eval`, output columns: :py:obj:`[span1_index, dtype=uint8]`, \
+            :py:obj:`[span2_index, dtype=uint8]`, :py:obj:`[span1_text, dtype=string]`, \
+            :py:obj:`[span2_text, dtype=string]`, :py:obj:`[idx, dtype=uint8]`, \
+            :py:obj:`[text, dtype=string]`, :py:obj:`[label, dtype=string]`.
+
+    - task = :py:obj:`CSL`
+        - usage = :py:obj:`train`, output columns: :py:obj:`[id, dtype=uint8]`, \
+            :py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`, :py:obj:`[label, dtype=string]`.
+        - usage = :py:obj:`test`, output columns: :py:obj:`[id, dtype=uint8]`, \
+            :py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`.
+        - usage = :py:obj:`eval`, output columns: :py:obj:`[id, dtype=uint8]`, \
+            :py:obj:`[abst, dtype=string]`, :py:obj:`[keyword, dtype=string]`, :py:obj:`[label, dtype=string]`.
+
+    Args:
+        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
+            a pattern of files. The list will be sorted in a lexicographical order.
+        task (str, optional): The kind of task, one of `AFQMC`, `TNEWS`, `IFLYTEK`, `CMNLI`, `WSC` and `CSL`.
+            (default=AFQMC).
+        usage (str, optional): Specify the `train`, `test` or `eval` part of dataset (default="train").
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will include all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_files are not valid or do not exist.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> clue_dataset_dir = ["/path/to/clue_dataset_file"] # contains 1 or multiple clue files
+        >>> dataset = ds.CLUEDataset(dataset_files=clue_dataset_dir, task='AFQMC', usage='train')
+
+    About CLUE dataset:
+
+    CLUE, a Chinese Language Understanding Evaluation benchmark. It contains multiple
+    tasks, including single-sentence classification, sentence pair classification, and machine
+    reading comprehension.
+
+    You can unzip the dataset files into the following structure and read by MindSpore's API,
+    such as afqmc dataset:
+
+    .. code-block::
+
+        .
+        └── afqmc_public
+             ├── train.json
+             ├── test.json
+             └── dev.json
+
+    Citation:
+
+    .. code-block::
+
+        @article{CLUEbenchmark,
+        title   = {CLUE: A Chinese Language Understanding Evaluation Benchmark},
+        author  = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li,
+                Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng,
+                Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou,
+                Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan},
+        journal = {arXiv preprint arXiv:2004.05986},
+        year    = {2020},
+        howpublished = {https://github.com/CLUEbenchmark/CLUE}
+        }
+    """
+
+    @check_cluedataset
+    def __init__(self, dataset_files, task='AFQMC', usage='train', num_samples=None, num_parallel_workers=None,
+                 shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_files = self._find_files(dataset_files)
+        self.usage = replace_none(usage, 'train')
+        self.task = replace_none(task, 'AFQMC')
+
+    def parse(self, children=None):
+        return cde.CLUENode(self.dataset_files, self.task, self.usage, self.num_samples, self.shuffle_flag,
+                            self.num_shards, self.shard_id)
+
+
+class CoNLL2000Dataset(SourceDataset):
+    """
+    A source dataset that reads and parses CoNLL2000 dataset.
+
+    The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]`.
+    The tensor of column :py:obj:`word` is of the string type.
+    The tensor of column :py:obj:`pos_tag` is of the string type.
+    The tensor of column :py:obj:`chunk_tag` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test`,  or `all`. `train` will read from
+            8936 train samples, `test` will read from 2,012 test samples,
+            `all` will read from all 1,0948 samples (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> conll2000_dataset_dir = "/path/to/conll2000_dataset_dir"
+        >>> dataset = ds.CoNLL2000Dataset(dataset_files=conll2000_dataset_dir, usage='all')
+    """
+
+    @check_conll2000_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
+                 shard_id=None, num_parallel_workers=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+
+    def parse(self, children=None):
+        return cde.CoNLL2000Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                                 self.shard_id)
+
+
+class CSVDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses comma-separated values (CSV) datasets.
+    The columns of generated dataset depend on the source CSV files.
+
+    Args:
+        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search
+            for a pattern of files. The list will be sorted in a lexicographical order.
+        field_delim (str, optional): A string that indicates the char delimiter to separate fields (default=',').
+        column_defaults (list, optional): List of default values for the CSV field (default=None). Each item
+            in the list is either a valid type (float, int, or string). If this is not provided, treats all
+            columns as string type.
+        column_names (list[str], optional): List of column names of the dataset (default=None). If this
+            is not provided, infers the column_names from the first row of CSV file.
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will include all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_files are not valid or do not exist.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> csv_dataset_dir = ["/path/to/csv_dataset_file"] # contains 1 or multiple csv files
+        >>> dataset = ds.CSVDataset(dataset_files=csv_dataset_dir, column_names=['col1', 'col2', 'col3', 'col4'])
+    """
+
+    @check_csvdataset
+    def __init__(self, dataset_files, field_delim=',', column_defaults=None, column_names=None, num_samples=None,
+                 num_parallel_workers=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_files = self._find_files(dataset_files)
+        self.dataset_files.sort()
+        self.field_delim = replace_none(field_delim, ',')
+        self.column_defaults = replace_none(column_defaults, [])
+        self.column_names = replace_none(column_names, [])
+
+    def parse(self, children=None):
+        return cde.CSVNode(self.dataset_files, self.field_delim, self.column_defaults, self.column_names,
+                           self.num_samples, self.shuffle_flag, self.num_shards, self.shard_id)
+
+
+class DBpediaDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses the DBpedia dataset.
+
+    The generated dataset has three columns :py:obj:`[class, title, content]`.
+    The tensor of column :py:obj:`class` is of the string type.
+    The tensor of column :py:obj:`title` is of the string type.
+    The tensor of column :py:obj:`content` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`.
+            `train` will read from 560,000 train samples,
+            `test` will read from 70,000 test samples,
+            `all` will read from all 630,000 samples (default=None, all samples).
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will include all text).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL;
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Examples:
+        >>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
+        >>>
+        >>> # 1) Read 3 samples from DBpedia dataset
+        >>> dataset = ds.DBpediaDataset(dataset_dir=dbpedia_dataset_dir, num_samples=3)
+        >>>
+        >>> # 2) Read train samples from DBpedia dataset
+        >>> dataset = ds.DBpediaDataset(dataset_dir=dbpedia_dataset_dir, usage="train")
+
+    About DBpedia dataset:
+
+    The DBpedia dataset consists of 630,000 text samples in 14 classes, there are 560,000 samples in the train.csv
+    and 70,000 samples in the test.csv.
+    The 14 different classes represent Company, EducationaInstitution, Artist, Athlete, OfficeHolder,
+    MeanOfTransportation, Building, NaturalPlace, Village, Animal, Plant, Album, Film, WrittenWork.
+
+    Here is the original DBpedia dataset structure.
+    You can unzip the dataset files into this directory structure and read by Mindspore's API.
+
+    .. code-block::
+
+        .
+        └── dbpedia_dataset_dir
+            ├── train.csv
+            ├── test.csv
+            ├── classes.txt
+            └── readme.txt
+
+    .. code-block::
+
+        @article{DBpedia,
+        title   = {DBPedia Ontology Classification Dataset},
+        author  = {Jens Lehmann, Robert Isele, Max Jakob, Anja Jentzsch, Dimitris Kontokostas,
+                Pablo N. Mendes, Sebastian Hellmann, Mohamed Morsey, Patrick van Kleef,
+                    Sören Auer, Christian Bizer},
+        year    = {2015},
+        howpublished = {http://dbpedia.org}
+        }
+    """
+
+    @check_dbpedia_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.DBpediaNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                               self.shard_id)
+
+
+class EnWik9Dataset(SourceDataset):
+    """
+    A source dataset that reads and parses EnWik9 dataset.
+
+    The generated dataset has one column :py:obj:`[text]` with type string.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will include all samples).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=True).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Examples:
+        >>> en_wik9_dataset_dir = "/path/to/en_wik9_dataset"
+        >>> dataset2 = ds.EnWik9Dataset(dataset_dir=en_wik9_dataset_dir, num_samples=2,
+        ...                             shuffle=True)
+
+    About EnWik9 dataset:
+
+    The data of EnWik9 is UTF-8 encoded XML consisting primarily of English text. It contains 243,426 article titles,
+    of which 85,560 are #REDIRECT to fix broken links, and the rest are regular articles.
+
+    The data is UTF-8 clean. All characters are in the range U'0000 to U'10FFFF with valid encodings of 1 to
+    4 bytes. The byte values 0xC0, 0xC1, and 0xF5-0xFF never occur. Also, in the Wikipedia dumps,
+    there are no control characters in the range 0x00-0x1F except for 0x09 (tab) and 0x0A (linefeed).
+    Linebreaks occur only on paragraph boundaries, so they always have a semantic purpose.
+
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── EnWik9
+             ├── enwik9
+
+    Citation:
+
+    .. code-block::
+
+        @NetworkResource{Hutter_prize,
+        author    = {English Wikipedia},
+        url       = "https://cs.fit.edu/~mmahoney/compression/textdata.html",
+        month     = {March},
+        year      = {2006}
+        }
+    """
+
+    @check_en_wik9_dataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=True,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+
+    def parse(self, children=None):
+        return cde.EnWik9Node(self.dataset_dir, self.num_samples, self.shuffle_flag, self.num_shards,
+                              self.shard_id)
+
+class IMDBDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Internet Movie Database (IMDb).
+
+    The generated dataset has two columns: :py:obj:`[text, label]`.
+    The tensor of column :py:obj:`text` is of the string type.
+    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`
+            (default=None, will read all samples).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all samples).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - The shape of the test column.
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> imdb_dataset_dir = "/path/to/imdb_dataset_directory"
+        >>>
+        >>> # 1) Read all samples (text files) in imdb_dataset_dir with 8 threads
+        >>> dataset = ds.IMDBDataset(dataset_dir=imdb_dataset_dir, num_parallel_workers=8)
+        >>>
+        >>> # 2) Read train samples (text files).
+        >>> dataset = ds.IMDBDataset(dataset_dir=imdb_dataset_dir, usage="train")
+
+    About IMDBDataset:
+
+    The IMDB dataset contains 50, 000 highly polarized reviews from the Internet Movie Database (IMDB). The data set
+    was divided into 25 000 comments for training and 25 000 comments for testing, with both the training set and test
+    set containing 50% positive and 50% negative comments. Train labels and test labels are all lists of 0 and 1, where
+    0 stands for negative and 1 for positive.
+
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── imdb_dataset_directory
+             ├── train
+             │    ├── pos
+             │    │    ├── 0_9.txt
+             │    │    ├── 1_7.txt
+             │    │    ├── ...
+             │    ├── neg
+             │    │    ├── 0_3.txt
+             │    │    ├── 1_1.txt
+             │    │    ├── ...
+             ├── test
+             │    ├── pos
+             │    │    ├── 0_10.txt
+             │    │    ├── 1_10.txt
+             │    │    ├── ...
+             │    ├── neg
+             │    │    ├── 0_2.txt
+             │    │    ├── 1_3.txt
+             │    │    ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @InProceedings{maas-EtAl:2011:ACL-HLT2011,
+          author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan
+                        and  Ng, Andrew Y.  and  Potts, Christopher},
+          title     = {Learning Word Vectors for Sentiment Analysis},
+          booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics:
+                        Human Language Technologies},
+          month     = {June},
+          year      = {2011},
+          address   = {Portland, Oregon, USA},
+          publisher = {Association for Computational Linguistics},
+          pages     = {142--150},
+          url       = {http://www.aclweb.org/anthology/P11-1015}
+        }
+    """
+
+    @check_imdb_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.IMDBNode(self.dataset_dir, self.usage, self.sampler)
+
+
+class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses IWSLT2016 datasets.
+
+    The generated dataset has two columns: :py:obj:`[text, translation]`.
+    The tensor of column :py:obj: `text` is of the string type.
+    The tensor of column :py:obj: `translation` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Acceptable usages include "train", "valid", "test" and "all" (default=None, all samples).
+        language_pair (sequence, optional): Sequence containing source and target language, supported values are
+            (`en`, `fr`), ("en", "de"), ("en", "cs"), ("en", "ar"), ("fr", "en"), ("de", "en"), ("cs", "en"),
+            ("ar", "en") (default=("de", "en")).
+        valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
+            of valid_set type will be read, supported values are "dev2010", "tst2010", "tst2011", "tst2012", "tst2013"
+            and "tst2014" (default="tst2013").
+        test_set (str, optional): A string to identify test set, when usage is test or all, the test set of test_set
+            type will be read, supported values are "dev2010", "tst2010", "tst2011", "tst2012", "tst2013" and "tst2014"
+            (default="tst2014").
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> iwslt2016_dataset_dir = "/path/to/iwslt2016_dataset_dir"
+        >>> dataset = ds.IWSLT2016Dataset(dataset_files=iwslt2016_dataset_dir, usage='all',
+        ...                               language_pair=('de', 'en'), valid_set='tst2013', test_set='tst2014')
+
+    About IWSLT2016 dataset:
+
+    IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
+    of oral translation. The MT task of the IWSLT evaluation activity constitutes a data set, which can be publicly
+    obtained through the WIT3 website wit3.fbk.eu. The IWSLT2016 data set includes translations from English to Arabic,
+    Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
+
+    You can unzip the original IWSLT2016 dataset files into this directory structure and read by MindSpore's API. After
+    decompression, you also need to decompress the data set to be read in the specified folder. For example, if you want
+    to read the data set of de-en, you need to unzip the tgz file in the de/en directory, the data set is in the
+    unzipped folder.
+
+    .. code-block::
+
+        .
+        └── iwslt2016_dataset_directory
+             ├── subeval_files
+             └── texts
+                  ├── ar
+                  │    └── en
+                  │        └── ar-en
+                  ├── cs
+                  │    └── en
+                  │        └── cs-en
+                  ├── de
+                  │    └── en
+                  │        └── de-en
+                  │            ├── IWSLT16.TED.dev2010.de-en.de.xml
+                  │            ├── train.tags.de-en.de
+                  │            ├── ...
+                  ├── en
+                  │    ├── ar
+                  │    │   └── en-ar
+                  │    ├── cs
+                  │    │   └── en-cs
+                  │    ├── de
+                  │    │   └── en-de
+                  │    └── fr
+                  │        └── en-fr
+                  └── fr
+                       └── en
+                           └── fr-en
+
+    Citation:
+
+    .. code-block::
+
+        @inproceedings{cettoloEtAl:EAMT2012,
+        Address = {Trento, Italy},
+        Author = {Mauro Cettolo and Christian Girardi and Marcello Federico},
+        Booktitle = {Proceedings of the 16$^{th}$ Conference of the European Association for Machine Translation
+                     (EAMT)},
+        Date = {28-30},
+        Month = {May},
+        Pages = {261--268},
+        Title = {WIT$^3$: Web Inventory of Transcribed and Translated Talks},
+        Year = {2012}}
+    """
+
+    @check_iwslt2016_dataset
+    def __init__(self, dataset_dir, usage=None, language_pair=None, valid_set=None, test_set=None,
+                 num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, num_parallel_workers=None,
+                 cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+        self.language_pair = replace_none(language_pair, ["de", "en"])
+        self.valid_set = replace_none(valid_set, 'tst2013')
+        self.test_set = replace_none(test_set, 'tst2014')
+
+    def parse(self, children=None):
+        return cde.IWSLT2016Node(self.dataset_dir, self.usage, self.language_pair, self.valid_set, self.test_set,
+                                 self.num_samples, self.shuffle_flag, self.num_shards, self.shard_id)
+
+
+class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses IWSLT2017 datasets.
+
+    The generated dataset has two columns: :py:obj:`[text, translation]`.
+    The tensor of column :py:obj:`text` is of the string type.
+    The tensor of column :py:obj:`translation` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Acceptable usages include "train", "valid", "test" and "all" (default=None, all samples).
+        language_pair (list, optional): List containing src and tgt language, supported values are ("en", "nl"),
+            ("en", "de"), ("en", "it"), ("en", "ro"), ("nl", "en"), ("nl", "de"), ("nl", "it"), ("nl", "ro"),
+            ("de", "en"), ("de", "nl"), ("de", "it"), ("de", "ro"), ("it", "en"), ("it", "nl"), ("it", "de"),
+            ("it", "ro"), (`ro`, `en`), (`ro`, `nl`), (`ro`, `de`), (`ro`, `it`) (default=(`de`, `en`)).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> iwslt2017_dataset_dir = "/path/to/iwslt207_dataset_dir"
+        >>> dataset = ds.IWSLT2017Dataset(dataset_files=iwslt2017_dataset_dir, usage='all', language_pair=('de', 'en'))
+
+    About IWSLT2017 dataset:
+
+    IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
+    of oral translation. The MT task of the IWSLT evaluation activity constitutes a data set, which can be publicly
+    obtained through the WIT3 website wit3.fbk.eu. The IWSLT2017 data set involves German, English, Italian, Dutch, and
+    Romanian. The data set includes translations in any two different languages.
+
+    You can unzip the original IWSLT2017 dataset files into this directory structure and read by MindSpore's API. You
+    need to decompress the dataset package in texts/DeEnItNlRo/DeEnItNlRo directory to get the DeEnItNlRo-DeEnItNlRo
+    subdirectory.
+
+    .. code-block::
+
+        .
+        └── iwslt2017_dataset_directory
+            └── DeEnItNlRo
+                └── DeEnItNlRo
+                    └── DeEnItNlRo-DeEnItNlRo
+                        ├── IWSLT17.TED.dev2010.de-en.de.xml
+                        ├── train.tags.de-en.de
+                        ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @inproceedings{cettoloEtAl:EAMT2012,
+        Address = {Trento, Italy},
+        Author = {Mauro Cettolo and Christian Girardi and Marcello Federico},
+        Booktitle = {Proceedings of the 16$^{th}$ Conference of the European Association for Machine Translation
+                     (EAMT)},
+        Date = {28-30},
+        Month = {May},
+        Pages = {261--268},
+        Title = {WIT$^3$: Web Inventory of Transcribed and Translated Talks},
+        Year = {2012}}
+    """
+
+    @check_iwslt2017_dataset
+    def __init__(self, dataset_dir, usage=None, language_pair=None, num_samples=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, num_parallel_workers=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+        self.language_pair = replace_none(language_pair, ["de", "en"])
+
+    def parse(self, children=None):
+        return cde.IWSLT2017Node(self.dataset_dir, self.usage, self.language_pair, self.num_samples,
+                                 self.shuffle_flag, self.num_shards, self.shard_id)
+
+
+class PennTreebankDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses PennTreebank datasets.
+
+    The generated dataset has one column :py:obj:`[text]`.
+    The tensor of column :py:obj:`text` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Acceptable usages include `train`, `test`, 'valid' and `all`.
+            'train' will read from 42,068 train samples of string type,
+            'test' will read from 3,370 test samples of string type,
+            'valid' will read from 3,761 test samples of string type,
+            'all' will read from all 49,199 samples of string type (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Examples:
+        >>> penn_treebank_dataset_dir = "/path/to/penn_treebank_dataset_directory"
+        >>> dataset = ds.PennTreebankDataset(dataset_dir=penn_treebank_dataset_dir, usage='all')
+
+    About PennTreebank dataset:
+
+    Penn Treebank (PTB) dataset, is widely used in machine learning for NLP (Natural Language Processing)
+    research. Word-level PTB does not contain capital letters, numbers, and punctuations, and the vocabulary
+    is capped at 10k unique words, which is relatively small in comparison to most modern datasets which
+    can result in a larger number of out of vocabulary tokens.
+
+    Here is the original PennTreebank dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └── PennTreebank_dataset_dir
+             ├── ptb.test.txt
+             ├── ptb.train.txt
+             └── ptb.valid.txt
+
+    Citation:
+
+    .. code-block::
+
+        @techreport{Santorini1990,
+          added-at = {2014-03-26T23:25:56.000+0100},
+          author = {Santorini, Beatrice},
+          biburl = {https://www.bibsonomy.org/bibtex/234cdf6ddadd89376090e7dada2fc18ec/butonic},
+          file = {:Santorini - Penn Treebank tag definitions.pdf:PDF},
+          institution = {Department of Computer and Information Science, University of Pennsylvania},
+          interhash = {818e72efd9e4b5fae3e51e88848100a0},
+          intrahash = {34cdf6ddadd89376090e7dada2fc18ec},
+          keywords = {dis pos tagging treebank},
+          number = {MS-CIS-90-47},
+          timestamp = {2014-03-26T23:25:56.000+0100},
+          title = {Part-of-speech tagging guidelines for the {P}enn {T}reebank {P}roject},
+          url = {ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz},
+          year = 1990
+        }
+    """
+
+    @check_penn_treebank_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.PennTreebankNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                                    self.shard_id)
+
+
+class SogouNewsDataset(SourceDataset):
+    """
+    A source dataset that reads and parses Sogou News dataset.
+
+    The generated dataset has three columns: :py:obj:`[index, title, content]`.
+    The tensor of column :py:obj:`index` is of the string type.
+    The tensor of column :py:obj:`title` is of the string type.
+    The tensor of column :py:obj:`content` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` .
+            `train` will read from 450,000 train samples, `test` will read from 60,000 test samples,
+            `all` will read from all 510,000 samples (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, read all samples).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> sogou_news_dataset_dir = "/path/to/sogou_news_dataset_dir"
+        >>> dataset = ds.SogouNewsDataset(dataset_files=sogou_news_dataset_dir, usage='all')
+
+    About SogouNews Dataset:
+
+    SogouNews dataset includes 3 columns, corresponding to class index (1 to 5), title and content. The title and
+    content are escaped using double quotes ("), and any internal double quote is escaped by 2 double quotes ("").
+    New lines are escaped by a backslash followed with an "n" character, that is "\n".
+
+    You can unzip the dataset files into the following structure and read by MindSpore's API:
+
+    .. code-block::
+
+        .
+        └── sogou_news_dir
+             ├── classes.txt
+             ├── readme.txt
+             ├── test.csv
+             └── train.csv
+
+    Citation:
+
+    .. code-block::
+
+        @misc{zhang2015characterlevel,
+            title={Character-level Convolutional Networks for Text Classification},
+            author={Xiang Zhang and Junbo Zhao and Yann LeCun},
+            year={2015},
+            eprint={1509.01626},
+            archivePrefix={arXiv},
+            primaryClass={cs.LG}
+        }
+    """
+
+    @check_sogou_news_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
+                 shard_id=None, num_parallel_workers=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+
+    def parse(self, children=None):
+        return cde.SogouNewsNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
+                                 self.num_shards, self.shard_id)
+
+
+class TextFileDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses datasets stored on disk in text format.
+    The generated dataset has one column :py:obj:`[text]` with type string.
+
+    Args:
+        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
+            pattern of files. The list will be sorted in a lexicographical order.
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will include all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_files are not valid or do not exist.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
+        >>> dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir)
+    """
+
+    @check_textfiledataset
+    def __init__(self, dataset_files, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_files = self._find_files(dataset_files)
+        self.dataset_files.sort()
+
+    def parse(self, children=None):
+        return cde.TextFileNode(self.dataset_files, self.num_samples, self.shuffle_flag, self.num_shards,
+                                self.shard_id)
+
+
+class UDPOSDataset(SourceDataset):
+    """
+    A source dataset that reads and parses UDPOS dataset.
+
+    The generated dataset has three columns: :py:obj:`[word, universal, stanford]`.
+    The tensor of column :py:obj:`word` is of the string type.
+    The tensor of column :py:obj:`universal` is of the string type.
+    The tensor of column :py:obj:`stanford` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train` will read from
+            12,543 train samples, `test` will read from 2,077 test samples, `valid` will read from 2,002 test samples,
+            `all` will read from all 16,622 samples (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> udpos_dataset_dir = "/path/to/udpos_dataset_dir"
+        >>> dataset = ds.UDPOSDataset(dataset_files=udpos_dataset_dir, usage='all')
+    """
+
+    @check_udpos_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
+                 shard_id=None, num_parallel_workers=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+
+    def parse(self, children=None):
+        return cde.UDPOSNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                             self.shard_id)
+
+
+class WikiTextDataset(SourceDataset):
+    """
+    A source dataset that reads and parses WikiText2 and WikiText103 datasets.
+
+    The generated dataset has one column :py:obj:`[text]`.
+    The tensor of column :py:obj:`text` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Acceptable usages include `train`, `test`, 'valid' and `all`(default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads the full dataset).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, 'num_samples' reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Examples:
+        >>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
+        >>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
+
+    About WikiTextDataset dataset:
+
+    The WikiText Long Term Dependency Language Modeling Dataset is an English lexicon containing 100 million words.
+    These terms are drawn from Wikipedia's premium and benchmark articles, including versions of Wikitext2 and
+    Wikitext103. For WikiText2, it has 36718 lines in wiki.train.tokens, 4358 lines in wiki.test.tokens and
+    3760 lines in wiki.valid.tokens. For WikiText103, it has 1801350 lines in wiki.train.tokens, 4358 lines in
+    wiki.test.tokens and 3760 lines in wiki.valid.tokens.
+
+    Here is the original WikiText dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── WikiText2/WikiText103
+             ├── wiki.train.tokens
+             ├── wiki.test.tokens
+             ├── wiki.valid.tokens
+
+    Citation:
+
+    .. code-block::
+
+        @article{merity2016pointer,
+          title={Pointer sentinel mixture models},
+          author={Merity, Stephen and Xiong, Caiming and Bradbury, James and Socher, Richard},
+          journal={arXiv preprint arXiv:1609.07843},
+          year={2016}
+        }
+    """
+
+    @check_wiki_text_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.WikiTextNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                                self.shard_id)
+
+
+class YahooAnswersDataset(SourceDataset):
+    """
+    A source dataset that reads and parses the YahooAnswers dataset.
+
+    The generated dataset has three columns :py:obj:`[class, title, content, answer]`.
+    The tensor of column :py:obj:`class` is of the string type.
+    The tensor of column :py:obj:`title` is of the string type.
+    The tensor of column :py:obj:`content` is of the string type.
+    The tensor of column :py:obj:`answer` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`. `train` will read
+            from 1,400,000 train samples, `test` will read from 60,000 test samples, `all` will read from
+            all 1,460,000 samples (default=None, all samples).
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will include all text).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Examples:
+        >>> yahoo_answers_dataset_dir = "/path/to/yahoo_answers_dataset_directory"
+        >>>
+        >>> # 1) Read 3 samples from YahooAnswers dataset
+        >>> dataset = ds.YahooAnswersDataset(dataset_dir=yahoo_answers_dataset_dir, num_samples=3)
+        >>>
+        >>> # 2) Read train samples from YahooAnswers dataset
+        >>> dataset = ds.YahooAnswersDataset(dataset_dir=yahoo_answers_dataset_dir, usage="train")
+
+    About YahooAnswers dataset:
+
+    The YahooAnswers dataset consists of 630,000 text samples in 14 classes,
+    There are 560,000 samples in the train.csv and 70,000 samples in the test.csv.
+    The 10 different classes represent Society & Culture, Science & Mathematics, Health, Education & Reference,
+    Computers & Internet, Sports, Business & Finance, Entertainment & Music, Family & Relationships,
+    Politics & Government.
+
+    Here is the original YahooAnswers dataset structure.
+    You can unzip the dataset files into this directory structure and read by Mindspore's API.
+
+    .. code-block::
+
+        .
+        └── yahoo_answers_dataset_dir
+            ├── train.csv
+            ├── test.csv
+            ├── classes.txt
+            └── readme.txt
+
+    .. code-block::
+
+        @article{YahooAnswers,
+        title   = {Yahoo! Answers Topic Classification Dataset},
+        author  = {Xiang Zhang},
+        year    = {2015},
+        howpublished = {}
+        }
+    """
+
+    @check_yahoo_answers_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.YahooAnswersNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
+                                    self.num_shards, self.shard_id)
+
+
+class YelpReviewDataset(SourceDataset, TextBaseDataset):
+    """
+    A source dataset that reads and parses Yelp Review Polarity and Yelp Review Full dataset.
+
+    The generated dataset has two columns: :py:obj:`[label, text]`.
+    The tensor of column :py:obj:`label` is of the string type.
+    The tensor of column :py:obj:`text` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`.
+            For Polarity, `train` will read from 560,000 train samples, `test` will read from 38,000 test samples,
+            `all` will read from all 598,000 samples.
+            For Full, `train` will read from 650,000 train samples, `test` will read from 50,000 test samples,
+            `all` will read from all 700,000 samples (default=None, all samples).
+        num_samples (int, optional): Number of samples (rows) to read (default=None, reads all samples).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+
+    Examples:
+        >>> yelp_review_dataset_dir = "/path/to/yelp_review_dataset_dir"
+        >>> dataset = ds.YelpReviewDataset(dataset_dir=yelp_review_dataset_dir, usage='all')
+
+    About YelpReview Dataset:
+
+    The Yelp Review Full dataset consists of reviews from Yelp. It is extracted from the Yelp Dataset Challenge 2015
+    data, and it is mainly used for text classification.
+
+    The Yelp Review Polarity dataset is constructed from the above dataset, by considering stars 1 and 2 negative, and 3
+    and 4 positive.
+
+    The directory structures of these two datasets are the same.
+    You can unzip the dataset files into the following structure and read by MindSpore's API:
+
+    .. code-block::
+
+        .
+        └── yelp_review_dir
+             ├── train.csv
+             ├── test.csv
+             └── readme.txt
+
+    Citation:
+
+    For Yelp Review Polarity:
+
+    .. code-block::
+
+        @article{zhangCharacterlevelConvolutionalNetworks2015,
+          archivePrefix = {arXiv},
+          eprinttype = {arxiv},
+          eprint = {1509.01626},
+          primaryClass = {cs},
+          title = {Character-Level {{Convolutional Networks}} for {{Text Classification}}},
+          abstract = {This article offers an empirical exploration on the use of character-level convolutional networks
+                      (ConvNets) for text classification. We constructed several large-scale datasets to show that
+                      character-level convolutional networks could achieve state-of-the-art or competitive results.
+                      Comparisons are offered against traditional models such as bag of words, n-grams and their TFIDF
+                      variants, and deep learning models such as word-based ConvNets and recurrent neural networks.},
+          journal = {arXiv:1509.01626 [cs]},
+          author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
+          month = sep,
+          year = {2015},
+        }
+
+    Citation:
+
+    For Yelp Review Full:
+
+    .. code-block::
+
+        @article{zhangCharacterlevelConvolutionalNetworks2015,
+          archivePrefix = {arXiv},
+          eprinttype = {arxiv},
+          eprint = {1509.01626},
+          primaryClass = {cs},
+          title = {Character-Level {{Convolutional Networks}} for {{Text Classification}}},
+          abstract = {This article offers an empirical exploration on the use of character-level convolutional networks
+                      (ConvNets) for text classification. We constructed several large-scale datasets to show that
+                      character-level convolutional networks could achieve state-of-the-art or competitive results.
+                      Comparisons are offered against traditional models such as bag of words, n-grams and their TFIDF
+                      variants, and deep learning models such as word-based ConvNets and recurrent neural networks.},
+          journal = {arXiv:1509.01626 [cs]},
+          author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
+          month = sep,
+          year = {2015},
+        }
+    """
+
+    @check_yelp_review_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
+                 shard_id=None, num_parallel_workers=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, 'all')
+
+    def parse(self, children=None):
+        return cde.YelpReviewNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
+                                  self.num_shards, self.shard_id)
diff --git a/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py b/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py
new file mode 100644
index 00000000000..027d75e32dd
--- /dev/null
+++ b/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py
@@ -0,0 +1,898 @@
+# Copyright 2019-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+This dataset module supports various formats of datasets, including ImageNet, TFData,
+MNIST, Cifar10/100, Manifest, MindRecord, and more. This module loads data with
+high performance and parses data precisely. Some of the operations that are
+provided to users to preprocess data include shuffle, batch, repeat, map, and zip.
+"""
+import builtins
+import math
+import os
+import signal
+import time
+import multiprocessing
+from multiprocessing.util import Finalize
+import queue
+from functools import partial
+import threading
+import weakref
+import platform
+import psutil
+import numpy as np
+
+import mindspore._c_dataengine as cde
+
+from mindspore.common import Tensor
+from mindspore import log as logger
+
+from .datasets import MappableDataset, TextBaseDataset, Schema, to_list, _watch_dog, _check_shm_usage
+from . import samplers
+from .queue import _SharedQueue
+from .validators import check_generatordataset, check_numpyslicesdataset, check_paddeddataset
+from ..core.config import get_enable_shared_mem, get_prefetch_size
+from ..core.datatypes import mstypelist_to_detypelist
+from ..core.py_util_helpers import ExceptionHandler
+
+
+def _iter_fn(dataset, num_samples):
+    """
+    Generator function wrapper for iterable dataset.
+    """
+    if num_samples is not None and num_samples != 0:
+        ds_iter = iter(dataset)
+        for _ in range(num_samples):
+            try:
+                val = next(ds_iter)
+            except StopIteration:
+                return
+            # convert output tensors to ndarrays
+            yield _convert_row(val)
+    else:
+        for val in dataset:
+            # convert output tensors to ndarrays
+            yield _convert_row(val)
+
+
+def _generator_fn(generator, num_samples):
+    """
+    Generator function wrapper for generator function dataset.
+    """
+    if num_samples is not None and num_samples != 0:
+        gen_iter = generator()
+        for _ in range(num_samples):
+            try:
+                val = next(gen_iter)
+            except StopIteration:
+                return
+            yield val
+    else:
+        gen_iter = generator()
+        for val in gen_iter:
+            yield val
+
+
+def _cpp_sampler_fn(sample_ids, dataset):
+    """
+    Generator function wrapper for mappable dataset with cpp sampler.
+    """
+    if not isinstance(sample_ids, np.ndarray):
+        raise RuntimeError("Sample IDs are not in a numpy array.")
+    if sample_ids.size == 0:
+        raise RuntimeError("Sampler passed an empty sample IDs list.")
+
+    for i in sample_ids:
+        val = dataset[i]
+        # convert output tensors to ndarrays
+        yield _convert_row(val)
+
+
+def _cpp_sampler_fn_mp(sample_ids, sample_fn):
+    """
+    Multiprocessing generator function wrapper for mappable dataset with cpp sampler.
+    """
+    if not isinstance(sample_ids, np.ndarray):
+        raise RuntimeError("Sample IDs are not in a numpy array.")
+    if sample_ids.size == 0:
+        raise RuntimeError("Sampler passed an empty sample IDs list.")
+
+    return sample_fn.process(sample_ids)
+
+
+def _fill_worker_indices(workers, indices, idx):
+    """
+    Worker index queue filler, fill worker index queue in round robin order.
+    """
+    num_worker = len(workers)
+    while idx < len(indices):
+        try:
+            workers[idx % num_worker].put(indices[idx])
+            idx += 1
+        except queue.Full:
+            break
+    return idx
+
+
+def _convert_row(row):
+    """
+    Convert Op return value to numpy
+    """
+    value = []
+    if isinstance(row, dict):
+        raise ValueError("Return value in user defined python function should be numpy array, but got dict.")
+
+    # convert each column in row into numpy array
+    for x in row:
+        if isinstance(x, bytes):         # got image bytes from a file
+            value.append(np.frombuffer(x, np.uint8))
+        elif isinstance(x, Tensor):      # got mindspore.Tensor
+            value.append(x.asnumpy())
+        elif isinstance(x, dict):
+            raise ValueError("Return value in user defined python function should be numpy array, but got dict.")
+        else:
+            value.append(np.array(x, copy=False))
+    return tuple(value)
+
+
+class SamplerFn:
+    """
+    Multiprocessing or multithread generator function wrapper master process.
+    """
+
+    def __init__(self, dataset, num_worker, multi_process, max_rowsize):
+        self.workers = []
+        self.num_worker = num_worker
+        self.multi_process = multi_process
+        self.need_join = False
+        self.ppid = os.getpid()
+        self.pids = []
+        self.check_interval = 300  # the interval of check queue's size
+        self._final_join = True
+
+        # Event for end of epoch
+        if multi_process is True:
+            try:
+                self.eof = multiprocessing.Event()
+            except Exception:
+                raise RuntimeError("Init multiprocessing.Event() failed, This might be caused by insufficient shm,"
+                                   + " and the recommended shm size is at least 5 GB.")
+        else:
+            self.eof = threading.Event()
+        # Create workers
+
+        # get default queue size and adjust queuesize per worker if there are large # workers
+        queue_size = get_prefetch_size()
+        queue_size = min(queue_size, queue_size * 4 // num_worker)
+        queue_size = max(2, queue_size)
+
+        if multi_process and get_enable_shared_mem():
+            _check_shm_usage(num_worker, queue_size, max_rowsize)
+        for _ in range(num_worker):
+            if multi_process is True:
+                try:
+                    worker = _GeneratorWorkerMp(dataset, self.eof, max_rowsize, queue_size)
+                except Exception:
+                    raise RuntimeError("Init multiprocessing.Queue() failed, This might be caused by insufficient shm,"
+                                       + " and the recommended shm size is at least 5 GB.")
+                worker.daemon = True
+                # When multi processes fork a subprocess, the lock of the main process is copied to the subprocess,
+                # which may cause deadlock. Therefore, the subprocess startup is performed in che initialization phase.
+                # In this phase, the main process is not locked.
+                worker.start()
+                self.pids.append(worker.pid)
+                self.need_join = True
+            else:
+                worker = _GeneratorWorkerMt(dataset, self.eof)
+                worker.daemon = True
+            self.workers.append(worker)
+        if multi_process is True and platform.system().lower() != 'windows':
+            self.eot = threading.Event()
+            self.watch_dog = threading.Thread(target=_watch_dog, args=(self.eot, self.workers))
+            self.watch_dog.daemon = True
+            self.watch_dog.start()
+
+            if self._final_join is True:
+                self._jointhread = Finalize(
+                    self.watch_dog, self._finalize_join,
+                    args=(weakref.ref(self.watch_dog), self.eot),
+                    exitpriority=-5
+                )
+
+    def process(self, indices):
+        """
+        The main process, start the child process or child thread, and fill the index queue.
+        Get the result and return.
+        """
+        for w in self.workers:
+            # Check whether the queue of the subprocess is empty.
+            if not w.queue_empty():
+                raise Exception("The queue of the subprocess is not empty.")
+            # Start all workers
+            if not w.is_alive():
+                w.start()
+
+        # Fill initial index queues
+        idx_cursor = 0
+        idx_cursor = _fill_worker_indices(self.workers, indices, idx_cursor)
+
+        # Fetch results
+        for i in range(len(indices)):
+            if self.eof.is_set():
+                self._stop_subprocess()
+                return
+            if self.multi_process is True and not psutil.pid_exists(self.workers[i % self.num_worker].pid):
+                self._stop_subprocess()
+                return
+            # Fetch result and put index
+            try:
+                # To avoid get timeout from queue, check the res_queue size.
+                start_time = int(time.time())
+                wait_count = 1
+                while self.workers[i % self.num_worker].res_queue.empty():
+                    time.sleep(0.1)
+                    cost_time = int(time.time()) - start_time
+                    if cost_time / self.check_interval >= wait_count:
+                        wait_count += 1
+                        logger.warning("It has been waiting for " + str(cost_time) + "s because the multi "
+                                       "thread/process of the generator generates data had been hung by gil lock.")
+
+                result = self.workers[i % self.num_worker].get()
+                if isinstance(result, ExceptionHandler):
+                    result.reraise()
+            except queue.Empty:
+                self._stop_subprocess()
+                raise Exception("Generator worker process timeout.")
+            except KeyboardInterrupt:
+                self._stop_subprocess()
+                raise Exception("Generator worker receives KeyboardInterrupt.")
+            if self.eof.is_set():
+                self._stop_subprocess()
+                return
+            if idx_cursor < len(indices):
+                idx_cursor = _fill_worker_indices(self.workers, indices, idx_cursor)
+            yield _convert_row(result)
+
+    def _stop_subprocess(self):
+        """Only the main process can call join."""
+        if self.need_join is True and self.ppid == os.getpid():
+            self.eof.set()
+            self.need_join = False
+            for w in self.workers:
+                if self.multi_process is True and hasattr(w, '_closed') and w._closed is False:  # pylint: disable=W0212
+                    w.join()
+            self._abort_watchdog()
+
+    def _abort_watchdog(self):
+        if hasattr(self, 'eot') and self.eot is not None and not self.eot.is_set():
+            self.eot.set()
+
+    @classmethod
+    def _finalize_join(cls, twr, eot):
+        thread = twr()
+        if thread is not None:
+            if eot is not None and not eot.is_set():
+                eot.set()
+            thread.join()
+
+    def __del__(self):
+        self._stop_subprocess()
+
+
+def _subprocess_handle(eof, signum, frame):
+    threading.Thread(target=eof.set()).start()
+
+
+def _generator_worker_loop(dataset, idx_queue, result_queue, eof, is_multiprocessing):
+    """
+    Multithread or multiprocess generator worker process loop.
+    """
+    if is_multiprocessing:
+        signal.signal(signal.SIGTERM, partial(_subprocess_handle, eof))
+    while True:
+        # Fetch index, block
+        try:
+            idx = idx_queue.get(timeout=1)
+        except KeyboardInterrupt:
+            if is_multiprocessing:
+                eof.set()
+                idx_queue.cancel_join_thread()
+                result_queue.cancel_join_thread()
+            raise Exception("Generator worker receives KeyboardInterrupt.")
+        except queue.Empty:
+            if eof.is_set():
+                if is_multiprocessing:
+                    idx_queue.cancel_join_thread()
+                    result_queue.cancel_join_thread()
+                return
+            # If end-of-file (eof) is not set, continue to get data from idx_queue
+            continue
+        if idx is None:
+            # When the queue is out of scope from master process, a None item can be fetched from the queue.
+            # Upon receiving None, worker process should check if eof is set.
+            if not eof.is_set():
+                raise Exception("")
+            return
+        if eof.is_set():
+            if is_multiprocessing:
+                idx_queue.cancel_join_thread()
+                result_queue.cancel_join_thread()
+            return
+        # Fetch data, any exception from __getitem__ will terminate worker and timeout master process
+        try:
+            result = dataset[idx]
+        except Exception:  # pylint: disable=broad-except
+            result = ExceptionHandler(where="in GeneratorDataset worker process")
+        # Send data, block
+        while True:
+            try:
+                result_queue.put(result, timeout=5)
+            except KeyboardInterrupt:
+                if is_multiprocessing:
+                    eof.set()
+                    idx_queue.cancel_join_thread()
+                    result_queue.cancel_join_thread()
+                raise Exception("Generator worker receives KeyboardInterrupt.")
+            except queue.Full:
+                if eof.is_set():
+                    if is_multiprocessing:
+                        idx_queue.cancel_join_thread()
+                        result_queue.cancel_join_thread()
+                    return
+                # If eof is not set, continue to put data to result_queue
+                continue
+            break
+        del result, idx
+
+
+class _GeneratorWorkerMt(threading.Thread):
+    """
+    Worker process for multi-thread Generator.
+    """
+
+    def __init__(self, dataset, eof):
+        self.idx_queue = queue.Queue(16)
+        self.res_queue = queue.Queue(16)
+        super().__init__(target=_generator_worker_loop, args=(dataset, self.idx_queue, self.res_queue, eof, False))
+
+    def put(self, item):
+        """
+        Put function for worker index queue. Never block. Raise queue.Full on failure.
+        """
+        self.idx_queue.put_nowait(item)
+
+    def get(self):
+        """
+        Get function for worker result queue. Block with timeout.
+        """
+        return self.res_queue.get(timeout=30)
+
+    def queue_empty(self):
+        if not self.idx_queue.empty():
+            logger.warning("idx_queue is not empty")
+            return False
+        if not self.res_queue.empty():
+            logger.warning("res_queue is not empty")
+            return False
+        return True
+
+
+class _GeneratorWorkerMp(multiprocessing.Process):
+    """
+    Worker process for multiprocess Generator.
+    """
+
+    def __init__(self, dataset, eof, max_rowsize, queue_size):
+        self.idx_queue = multiprocessing.Queue(queue_size)
+        if get_enable_shared_mem():
+            self.res_queue = _SharedQueue(queue_size, max_rowsize=max_rowsize)
+        else:
+            self.res_queue = multiprocessing.Queue(queue_size)
+        self.idx_queue._joincancelled = True  # pylint: disable=W0212
+        self.res_queue._joincancelled = True  # pylint: disable=W0212
+        super().__init__(target=_generator_worker_loop, args=(dataset, self.idx_queue, self.res_queue, eof, True))
+
+    def put(self, item):
+        """
+        Put function for worker index queue. Never block. Raise queue.Full on failure.
+        """
+        self.idx_queue.put_nowait(item)
+
+    def get(self):
+        """
+        Get function for worker result queue. Block with timeout.
+        """
+        # Relax 10s to 30s, since it sometimes will cause "Generator worker process timeout"
+        # when we run too many iterators with infinite epoch(num_epoch=-1)
+        return self.res_queue.get(timeout=30)
+
+    def queue_empty(self):
+        if not self.idx_queue.empty():
+            logger.warning("idx_queue is not empty.")
+            return False
+        if not self.res_queue.empty():
+            logger.warning("res_queue is not empty.")
+            return False
+        return True
+
+
+class GeneratorDataset(MappableDataset, TextBaseDataset):
+    """
+    A source dataset that generates data from Python by invoking Python data source each epoch.
+
+    The column names and column types of generated dataset depend on Python data defined by users.
+
+    Args:
+        source (Union[Callable, Iterable, Random Accessible]):
+            A generator callable object, an iterable Python object or a random accessible Python object.
+            Callable source is required to return a tuple of NumPy arrays as a row of the dataset on source().next().
+            Iterable source is required to return a tuple of NumPy arrays as a row of the dataset on
+            iter(source).next().
+            Random accessible source is required to return a tuple of NumPy arrays as a row of the dataset on
+            source[idx].
+        column_names (Union[str, list[str]], optional): List of column names of the dataset (default=None). Users are
+            required to provide either column_names or schema.
+        column_types (list[mindspore.dtype], optional): List of column data types of the dataset (default=None).
+            If provided, sanity check will be performed on generator output.
+        schema (Union[Schema, str], optional): Path to the JSON schema file or schema object (default=None). Users are
+            required to provide either column_names or schema. If both are provided, schema will be used.
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
+            (default=None, expected order behavior shown in the table).
+        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
+            input is required (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            Random accessible input is required. When this argument is specified, `num_samples` reflects the maximum
+            sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
+            when num_shards is also specified. Random accessible input is required.
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
+            option could be beneficial if the Python operation is computational heavy (default=True).
+        max_rowsize(int, optional): Maximum size of row in MB that is used for shared memory allocation to copy
+            data between processes.  This is only used if python_multiprocessing is set to True (default 6 MB).
+
+    Raises:
+        RuntimeError: If source raises an exception during execution.
+        RuntimeError: If len of column_names does not match output len of source.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - Input `source` accept user defined Python function(PyFuncs), Do not add network computing operators from
+          mindspore.nn and mindspore.ops or others into this `source`.
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> import numpy as np
+        >>>
+        >>> # 1) Multidimensional generator function as callable input.
+        >>> def generator_multidimensional():
+        ...     for i in range(64):
+        ...         yield (np.array([[i, i + 1], [i + 2, i + 3]]),)
+        >>>
+        >>> dataset = ds.GeneratorDataset(source=generator_multidimensional, column_names=["multi_dimensional_data"])
+        >>>
+        >>> # 2) Multi-column generator function as callable input.
+        >>> def generator_multi_column():
+        ...     for i in range(64):
+        ...         yield np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]])
+        >>>
+        >>> dataset = ds.GeneratorDataset(source=generator_multi_column, column_names=["col1", "col2"])
+        >>>
+        >>> # 3) Iterable dataset as iterable input.
+        >>> class MyIterable:
+        ...     def __init__(self):
+        ...         self._index = 0
+        ...         self._data = np.random.sample((5, 2))
+        ...         self._label = np.random.sample((5, 1))
+        ...
+        ...     def __next__(self):
+        ...         if self._index >= len(self._data):
+        ...             raise StopIteration
+        ...         else:
+        ...             item = (self._data[self._index], self._label[self._index])
+        ...             self._index += 1
+        ...             return item
+        ...
+        ...     def __iter__(self):
+        ...         self._index = 0
+        ...         return self
+        ...
+        ...     def __len__(self):
+        ...         return len(self._data)
+        >>>
+        >>> dataset = ds.GeneratorDataset(source=MyIterable(), column_names=["data", "label"])
+        >>>
+        >>> # 4) Random accessible dataset as random accessible input.
+        >>> class MyAccessible:
+        ...     def __init__(self):
+        ...         self._data = np.random.sample((5, 2))
+        ...         self._label = np.random.sample((5, 1))
+        ...
+        ...     def __getitem__(self, index):
+        ...         return self._data[index], self._label[index]
+        ...
+        ...     def __len__(self):
+        ...         return len(self._data)
+        >>>
+        >>> dataset = ds.GeneratorDataset(source=MyAccessible(), column_names=["data", "label"])
+        >>>
+        >>> # list, dict, tuple of Python is also random accessible
+        >>> dataset = ds.GeneratorDataset(source=[(np.array(0),), (np.array(1),), (np.array(2),)], column_names=["col"])
+    """
+
+    @check_generatordataset
+    def __init__(self, source, column_names=None, column_types=None, schema=None, num_samples=None,
+                 num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None,
+                 python_multiprocessing=True, max_rowsize=6):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id)
+        if isinstance(source, builtins.zip):
+            # Although zip is iteratable, it does not have the feature of repeated iteration, so pass it to the array.
+            self.source = [item for item in source]
+        else:
+            self.source = source
+        self.prepared_source = None  # source to be sent to C++
+        if hasattr(self, 'operator_mixed') and getattr(self, 'operator_mixed') is True:
+            self.num_parallel_workers = 1
+            logger.warning(
+                "Input 'source' of 'GeneratorDataset' includes network computing operators like in mindspore.nn, "
+                "mindspore.ops, mindspore.numpy module and etc, which do not support multi-thread compiling, recommend"
+                " to replace it with python implemented operator like numpy etc. Here decrease 'num_parallel_workers' "
+                "into 1.")
+
+        self.python_multiprocessing = python_multiprocessing
+
+        self.column_names = to_list(column_names)
+
+        if column_types is not None:
+            self.column_types = mstypelist_to_detypelist(column_types)
+        else:
+            self.column_types = []
+
+        self.schema = schema
+        if schema is not None:
+            self.schema = schema
+            if not isinstance(schema, Schema):
+                self.schema = Schema(schema)
+        # Move get dataset_size by len from parse to here, because self.source will
+        # lose attribution of '__len__' after deepcopy.
+        self.source_len = -1  # unknown
+        if hasattr(self.source, "__len__"):
+            self.source_len = len(self.source)
+
+        self.max_rowsize = max_rowsize
+        self.sample_fn = None
+
+    def __deepcopy__(self, memodict):
+        if id(self) in memodict:
+            return memodict[id(self)]
+        new_op = self.__safe_deepcopy__(memodict, exclude=("source", "__transfer_dataset__"))
+
+        sample_fn = None
+        if new_op.sampler is not None and hasattr(self.source, "__getitem__"):
+            # The reason why there is a try catch here is because when the new op is being constructed with shared
+            # memory enabled, there will be an exception thrown if there is not enough shared memory available
+            if self.source_len == -1:
+                raise RuntimeError("Attempt to construct a random access dataset, '__len__' method is required!")
+            try:
+                if new_op.num_parallel_workers > 1:
+                    self.__validate_memory_usage()
+
+                    sample_fn = SamplerFn(self.source, new_op.num_parallel_workers, self.python_multiprocessing,
+                                          self.max_rowsize)
+                    new_op.prepared_source = (lambda sample_ids: _cpp_sampler_fn_mp(sample_ids, sample_fn))
+                else:
+                    new_op.prepared_source = (lambda sample_ids: _cpp_sampler_fn(sample_ids, self.source))
+                new_op.sample_fn = sample_fn
+            except RuntimeError as e:
+                raise Exception(str(e))
+        else:
+            try:
+                new_op.sampler = None
+                new_op.sample_fn = sample_fn
+                new_op.source_len = min(new_op.source_len,
+                                        new_op.num_samples) if new_op.num_samples != 0 else new_op.source_len
+                iter(self.source)
+            except TypeError:
+                # Use generator function if input callable
+                new_op.prepared_source = (lambda: _generator_fn(self.source, new_op.num_samples))
+            else:
+                # Use iterator function if input is iterable
+                # Random accessible input is also iterable
+                new_op.prepared_source = (lambda: _iter_fn(self.source, new_op.num_samples))
+
+        return new_op
+
+    def is_shuffled(self):
+        return self.sampler.is_shuffled()
+
+    def is_sharded(self):
+        return self.sampler.is_sharded()
+
+    def parse(self, children=None):
+        if self.schema is None:
+            return cde.GeneratorNode(self.prepared_source, self.column_names, self.column_types, self.source_len,
+                                     self.sampler, self.num_parallel_workers)
+        schema = self.schema
+        if isinstance(schema, Schema):
+            schema = self.schema.cpp_schema
+        return cde.GeneratorNode(self.prepared_source, schema, self.source_len, self.sampler,
+                                 self.num_parallel_workers)
+
+    def __validate_memory_usage(self):
+        """
+        Check memory usage when mulit-processing mode, when 85% prompt warning and 100% raise error.
+        """
+        if self.python_multiprocessing:
+            # if use num_parallel_workers is to large when python_multiprocessing=True which would cause
+            # OOM error get the num_shards
+            valid_num_shards = 1
+            if isinstance(self.sampler, samplers.DistributedSampler):
+                valid_num_shards = self.sampler.num_shards
+            elif self.num_shards is not None:
+                valid_num_shards = self.num_shards
+
+            # get process memory usage
+            process = psutil.Process(os.getpid())
+            process_memory = process.memory_info().rss
+            sys_memory_free = psutil.virtual_memory().free
+
+            total_memory_maybe_used = process_memory * self.num_parallel_workers * valid_num_shards
+            if total_memory_maybe_used / sys_memory_free > 0.85:
+                valid_num_worker = math.floor(sys_memory_free * 0.85 / valid_num_shards / process_memory)
+                valid_num_worker = 1 if valid_num_worker <= 0 else valid_num_worker
+                info = "GeneratorDataset num_parallel_workers: " + str(self.num_parallel_workers) + \
+                       " is too large which maybe cause a lot of memory occupation (>85%) or out of memory(OOM) " \
+                       "during multi process running. Therefore, it is recommended to reduce num_parallel_workers to " \
+                       + str(valid_num_worker) + " or smaller."
+                logger.warning(info)
+
+
+class _NumpySlicesDataset:
+    """
+    Mainly for dealing with several kinds of formats of Python data, and return one row each time.
+    """
+
+    def __init__(self, data, column_list=None):
+        self.column_list = None
+        # Convert dict data into tuple
+        if isinstance(data, dict):
+            data = self.process_dict(data)
+
+        if isinstance(data, tuple):
+            self.data = ()
+            data_len = len(data)
+            for i in range(data_len):
+                self.data = self.data + (np.array(data[i]),)
+        else:
+            self.data = (np.array(data),)
+
+        # check whether the data length in each column is equal
+        data_len = [len(data_item) for data_item in self.data]
+        if data_len[1:] != data_len[:-1]:
+            raise ValueError("Data length in each column is not equal.")
+
+        # Init column_name
+        if column_list is not None:
+            self.column_list = column_list
+        elif self.column_list is None:
+            self.column_list = []
+            column_num = len(self.data)
+            for i in range(column_num):
+                self.column_list.append("column_" + str(i))
+
+    def __getitem__(self, index):
+        data_row = [d[index, ...] for d in self.data]
+        data_res = tuple(data_row)
+        return data_res
+
+    def __len__(self):
+        return len(self.data[0])
+
+    def process_dict(self, input_data):
+        """
+        Convert the dict like data into tuple format, when input is a tuple of dicts then compose it into a dict first.
+        """
+        # Convert pandas like dict(has "values" column) into General dict
+        data_keys = list(input_data.keys())
+        data_col = input_data[data_keys[0]]
+        if hasattr(data_col, "values"):
+            new_dict = {}
+            for key in data_keys:
+                item1 = input_data.pop(key)
+                new_dict[key] = item1.values
+            input_data = new_dict
+
+        # Convert the data in dict into tuple
+        data = ()
+        keys = list(input_data.keys())
+        self.column_list = keys
+        for key in keys:
+            value = input_data[key]
+            data = data + (list(value),)
+
+        return data
+
+
+class NumpySlicesDataset(GeneratorDataset):
+    """
+    Creates a dataset with given data slices, mainly for loading Python data into dataset.
+
+    The column names and column types of generated dataset depend on Python data defined by users.
+
+    Args:
+        data (Union[list, tuple, dict]) Input of given data. Supported data types include: list, tuple, dict and other
+            NumPy formats. Input data will be sliced along the first dimension and generate additional rows, if input is
+            list, there will be one column in each row, otherwise there tends to be multi columns. Large data is not
+            recommended to be loaded in this way as data is loading into memory.
+        column_names (list[str], optional): List of column names of the dataset (default=None). If column_names is not
+            provided, the output column names will be named as the keys of dict when the input data is a dict,
+            otherwise they will be named like column_0, column_1 ...
+        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all samples).
+        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
+            (default=None, expected order behavior shown in the table).
+        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
+            input is required (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            Random accessible input is required. When this argument is specified, `num_samples` reflects the max
+            sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
+            when num_shards is also specified. Random accessible input is required.
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Raises:
+        RuntimeError: If len of column_names does not match output len of data.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Examples:
+        >>> # 1) Input data can be a list
+        >>> data = [1, 2, 3]
+        >>> dataset = ds.NumpySlicesDataset(data=data, column_names=["column_1"])
+        >>>
+        >>> # 2) Input data can be a dictionary, and column_names will be its keys
+        >>> data = {"a": [1, 2], "b": [3, 4]}
+        >>> dataset = ds.NumpySlicesDataset(data=data)
+        >>>
+        >>> # 3) Input data can be a tuple of lists (or NumPy arrays), each tuple element refers to data in each column
+        >>> data = ([1, 2], [3, 4], [5, 6])
+        >>> dataset = ds.NumpySlicesDataset(data=data, column_names=["column_1", "column_2", "column_3"])
+        >>>
+        >>> # 4) Load data from CSV file
+        >>> import pandas as pd
+        >>> df = pd.read_csv(filepath_or_buffer=csv_dataset_dir[0])
+        >>> dataset = ds.NumpySlicesDataset(data=dict(df), shuffle=False)
+    """
+
+    @check_numpyslicesdataset
+    def __init__(self, data, column_names=None, num_samples=None, num_parallel_workers=1, shuffle=None, sampler=None,
+                 num_shards=None, shard_id=None):
+        dataset = _NumpySlicesDataset(data, column_names)
+        super().__init__(dataset, column_names=dataset.column_list, num_samples=num_samples,
+                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
+                         num_shards=num_shards, shard_id=shard_id)
+
+
+class _PaddedDataset:
+    """
+    Mainly for combining false samples provided by users into a dataset.
+
+    Args:
+        padded_samples (list(dict)): Data provided by user to be added to the initial Dataset.
+    """
+
+    def __init__(self, padded_samples):
+        self.column_names = list(padded_samples[0].keys())
+        self.padded_samples = padded_samples
+
+    def __getitem__(self, item):
+        return (self.padded_samples[item][key] for key in self.column_names)
+
+    def __len__(self):
+        return len(self.padded_samples)
+
+
+class PaddedDataset(GeneratorDataset):
+    """
+    Creates a dataset with filler data provided by user. Mainly used to add to the original data set
+    and assign it to the corresponding shard.
+
+    Args:
+        padded_samples (list(dict)): Samples provided by user.
+
+    Raises:
+        TypeError: If padded_samples is not an instance of list.
+        TypeError: If the element of padded_samples is not an instance of dict.
+        ValueError: If the padded_samples is empty.
+
+    Examples:
+        >>> import numpy as np
+        >>> data = [{'image': np.zeros(1, np.uint8)}, {'image': np.zeros(2, np.uint8)}]
+        >>> dataset = ds.PaddedDataset(padded_samples=data)
+    """
+
+    @check_paddeddataset
+    def __init__(self, padded_samples):
+        dataset = _PaddedDataset(padded_samples)
+        super().__init__(dataset, column_names=dataset.column_names, num_shards=None, shard_id=None, shuffle=False)
+        self._dataset_size = len(dataset.padded_samples)
+        self.padded_samples = padded_samples
diff --git a/mindspore/python/mindspore/dataset/engine/datasets_vision.py b/mindspore/python/mindspore/dataset/engine/datasets_vision.py
new file mode 100644
index 00000000000..e82f4c8dd92
--- /dev/null
+++ b/mindspore/python/mindspore/dataset/engine/datasets_vision.py
@@ -0,0 +1,4235 @@
+# Copyright 2019-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+This dataset module supports various formats of datasets, including ImageNet, TFData,
+MNIST, Cifar10/100, Manifest, MindRecord, and more. This module loads data with
+high performance and parses data precisely. Some of the operations that are
+provided to users to preprocess data include shuffle, batch, repeat, map, and zip.
+"""
+import os
+import numpy as np
+from scipy.io import loadmat
+from PIL import Image
+
+import mindspore._c_dataengine as cde
+
+from .datasets import MappableDataset, SourceDataset, Shuffle, Schema
+from .datasets_user_defined import GeneratorDataset
+from .validators import check_imagefolderdataset, \
+    check_mnist_cifar_dataset, check_manifestdataset, check_vocdataset, check_cocodataset, \
+    check_celebadataset, check_flickr_dataset, check_sb_dataset, check_flowers102dataset, check_cityscapes_dataset, \
+    check_usps_dataset, check_div2k_dataset, check_random_dataset, \
+    check_sbu_dataset, check_qmnist_dataset, check_emnist_dataset, check_fake_image_dataset, check_places365_dataset, \
+    check_photo_tour_dataset, check_svhn_dataset, check_stl10_dataset, check_semeion_dataset, \
+    check_caltech101_dataset, check_caltech256_dataset, check_wider_face_dataset
+
+from ..core.validator_helpers import replace_none
+
+
+class _Caltech101Dataset:
+    """
+    Mainly for loading Caltech101 Dataset, and return two rows each time.
+    """
+
+    def __init__(self, dataset_dir, target_type="category", decode=False):
+        self.dataset_dir = os.path.realpath(dataset_dir)
+        self.image_dir = os.path.join(self.dataset_dir, "101_ObjectCategories")
+        self.annotation_dir = os.path.join(self.dataset_dir, "Annotations")
+        self.target_type = target_type
+        if self.target_type == "category":
+            self.column_names = ["image", "category"]
+        elif self.target_type == "annotation":
+            self.column_names = ["image", "annotation"]
+        else:
+            self.column_names = ["image", "category", "annotation"]
+        self.decode = decode
+        self.classes = sorted(os.listdir(self.image_dir))
+        if "BACKGROUND_Google" in self.classes:
+            self.classes.remove("BACKGROUND_Google")
+        name_map = {"Faces": "Faces_2",
+                    "Faces_easy": "Faces_3",
+                    "Motorbikes": "Motorbikes_16",
+                    "airplanes": "Airplanes_Side_2"}
+        self.annotation_classes = [name_map[class_name] if class_name in name_map else class_name
+                                   for class_name in self.classes]
+        self.image_index = []
+        self.image_label = []
+        for i, image_class in enumerate(self.classes):
+            sub_dir = os.path.join(self.image_dir, image_class)
+            if not os.path.isdir(sub_dir) or not os.access(sub_dir, os.R_OK):
+                continue
+            num_images = len(os.listdir(sub_dir))
+            self.image_index.extend(range(1, num_images + 1))
+            self.image_label.extend(num_images * [i])
+
+    def __getitem__(self, index):
+        image_file = os.path.join(self.image_dir, self.classes[self.image_label[index]],
+                                  "image_{:04d}.jpg".format(self.image_index[index]))
+        if not os.path.exists(image_file):
+            raise ValueError("The image file {} does not exist or permission denied!".format(image_file))
+        if self.decode:
+            image = np.asarray(Image.open(image_file).convert("RGB"))
+        else:
+            image = np.fromfile(image_file, dtype=np.uint8)
+
+        if self.target_type == "category":
+            return image, self.image_label[index]
+        annotation_file = os.path.join(self.annotation_dir, self.annotation_classes[self.image_label[index]],
+                                       "annotation_{:04d}.mat".format(self.image_index[index]))
+        if not os.path.exists(annotation_file):
+            raise ValueError("The annotation file {} does not exist or permission denied!".format(annotation_file))
+        annotation = loadmat(annotation_file)["obj_contour"]
+
+        if self.target_type == "annotation":
+            return image, annotation
+        return image, self.image_label[index], annotation
+
+    def __len__(self):
+        return len(self.image_index)
+
+
+class Caltech101Dataset(GeneratorDataset):
+    """
+    A source dataset that reads and parses Caltech101 dataset.
+
+    The columns of the generated dataset depend on the value of `target_type`.
+    When `target_type` is `category`, the columns are :py:obj:`[image, category]`.
+    When `target_type` is `annotation`, the columns are :py:obj:`[image, annotation]`.
+    When `target_type` is `all`, the columns are :py:obj:`[image, category, annotation]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`category` is of the uint32 type.
+    The tensor of column :py:obj:`annotation` is a 2-dimensional ndarray that stores the contour of the image
+    and consists of a series of points.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset. This root directory contains two
+            subdirectories, one is called 101_ObjectCategories, which stores images,
+            and the other is called Annotations, which stores annotations.
+        target_type (str, optional): Target of the image. If target_type is "category", return category represents
+            the target class. If target_type is "annotation", return annotation.
+            If target_type is "all", return category and annotation (default=None, means "category").
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data (default=1).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        decode (bool, optional): Whether or not to decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If target_type is not set correctly.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> caltech101_dataset_directory = "/path/to/caltech101_dataset_directory"
+        >>>
+        >>> # 1) Read all samples (image files) in caltech101_dataset_directory with 8 threads
+        >>> dataset = ds.Caltech101Dataset(dataset_dir=caltech101_dataset_directory, num_parallel_workers=8)
+        >>>
+        >>> # 2) Read all samples (image files) with the target_type "annotation"
+        >>> dataset = ds.Caltech101Dataset(dataset_dir=caltech101_dataset_directory, target_type="annotation")
+
+    About Caltech101Dataset:
+
+    Pictures of objects belonging to 101 categories. About 40 to 800 images per category.
+    Most categories have about 50 images. Collected in September 2003 by Fei-Fei Li, Marco Andreetto,
+    and Marc 'Aurelio Ranzato. The size of each image is roughly 300 x 200 pixels.
+    The official provides the contour data of each object in each picture, which is the annotation.
+
+    .. code-block::
+
+        .
+        └── caltech101_dataset_directory
+            ├── 101_ObjectCategories
+            │    ├── Faces
+            │    │    ├── image_0001.jpg
+            │    │    ├── image_0002.jpg
+            │    │    ...
+            │    ├── Faces_easy
+            │    │    ├── image_0001.jpg
+            │    │    ├── image_0002.jpg
+            │    │    ...
+            │    ├── ...
+            └── Annotations
+                 ├── Airplanes_Side_2
+                 │    ├── annotation_0001.mat
+                 │    ├── annotation_0002.mat
+                 │    ...
+                 ├── Faces_2
+                 │    ├── annotation_0001.mat
+                 │    ├── annotation_0002.mat
+                 │    ...
+                 ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @article{FeiFei2004LearningGV,
+        author    = {Li Fei-Fei and Rob Fergus and Pietro Perona},
+        title     = {Learning Generative Visual Models from Few Training Examples:
+                    An Incremental Bayesian Approach Tested on 101 Object Categories},
+        journal   = {Computer Vision and Pattern Recognition Workshop},
+        year      = {2004},
+        url       = {http://www.vision.caltech.edu/Image_Datasets/Caltech101/},
+        }
+    """
+
+    @check_caltech101_dataset
+    def __init__(self, dataset_dir, target_type=None, num_samples=None, num_parallel_workers=1,
+                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
+        self.dataset_dir = dataset_dir
+        self.target_type = replace_none(target_type, "category")
+        self.decode = replace_none(decode, False)
+        dataset = _Caltech101Dataset(self.dataset_dir, self.target_type, self.decode)
+        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
+                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
+                         num_shards=num_shards, shard_id=shard_id)
+
+    def get_class_indexing(self):
+        """
+        Get the class index.
+
+        Returns:
+            dict, a str-to-int mapping from label name to index.
+        """
+        class_dict = {'Faces': 0, 'Faces_easy': 1, 'Leopards': 2, 'Motorbikes': 3, 'accordion': 4, 'airplanes': 5,
+                      'anchor': 6, 'ant': 7, 'barrel': 8, 'bass': 9, 'beaver': 10, 'binocular': 11, 'bonsai': 12,
+                      'brain': 13, 'brontosaurus': 14, 'buddha': 15, 'butterfly': 16, 'camera': 17, 'cannon': 18,
+                      'car_side': 19, 'ceiling_fan': 20, 'cellphone': 21, 'chair': 22, 'chandelier': 23,
+                      'cougar_body': 24, 'cougar_face': 25, 'crab': 26, 'crayfish': 27, 'crocodile': 28,
+                      'crocodile_head': 29, 'cup': 30, 'dalmatian': 31, 'dollar_bill': 32, 'dolphin': 33,
+                      'dragonfly': 34, 'electric_guitar': 35, 'elephant': 36, 'emu': 37, 'euphonium': 38, 'ewer': 39,
+                      'ferry': 40, 'flamingo': 41, 'flamingo_head': 42, 'garfield': 43, 'gerenuk': 44, 'gramophone': 45,
+                      'grand_piano': 46, 'hawksbill': 47, 'headphone': 48, 'hedgehog': 49, 'helicopter': 50, 'ibis': 51,
+                      'inline_skate': 52, 'joshua_tree': 53, 'kangaroo': 54, 'ketch': 55, 'lamp': 56, 'laptop': 57,
+                      'llama': 58, 'lobster': 59, 'lotus': 60, 'mandolin': 61, 'mayfly': 62, 'menorah': 63,
+                      'metronome': 64, 'minaret': 65, 'nautilus': 66, 'octopus': 67, 'okapi': 68, 'pagoda': 69,
+                      'panda': 70, 'pigeon': 71, 'pizza': 72, 'platypus': 73, 'pyramid': 74, 'revolver': 75,
+                      'rhino': 76, 'rooster': 77, 'saxophone': 78, 'schooner': 79, 'scissors': 80, 'scorpion': 81,
+                      'sea_horse': 82, 'snoopy': 83, 'soccer_ball': 84, 'stapler': 85, 'starfish': 86,
+                      'stegosaurus': 87, 'stop_sign': 88, 'strawberry': 89, 'sunflower': 90, 'tick': 91,
+                      'trilobite': 92, 'umbrella': 93, 'watch': 94, 'water_lilly': 95, 'wheelchair': 96, 'wild_cat': 97,
+                      'windsor_chair': 98, 'wrench': 99, 'yin_yang': 100}
+        return class_dict
+
+
+class Caltech256Dataset(MappableDataset):
+    """
+    A source dataset that reads and parses Caltech256 dataset.
+
+    The generated dataset has two columns: :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        decode (bool, optional): Whether or not to decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> caltech256_dataset_dir = "/path/to/caltech256_dataset_directory"
+        >>>
+        >>> # 1) Read all samples (image files) in caltech256_dataset_dir with 8 threads
+        >>> dataset = ds.Caltech256Dataset(dataset_dir=caltech256_dataset_dir, num_parallel_workers=8)
+
+    About Caltech256Dataset:
+
+    Caltech-256 is an object recognition dataset containing 30,607 real-world images, of different sizes,
+    spanning 257 classes (256 object classes and an additional clutter class).
+    Each class is represented by at least 80 images. The dataset is a superset of the Caltech-101 dataset.
+
+    .. code-block::
+
+        .
+        └── caltech256_dataset_directory
+             ├── 001.ak47
+             │    ├── 001_0001.jpg
+             │    ├── 001_0002.jpg
+             │    ...
+             ├── 002.american-flag
+             │    ├── 002_0001.jpg
+             │    ├── 002_0002.jpg
+             │    ...
+             ├── 003.backpack
+             │    ├── 003_0001.jpg
+             │    ├── 003_0002.jpg
+             │    ...
+             ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @article{griffin2007caltech,
+        title     = {Caltech-256 object category dataset},
+        added-at  = {2021-01-21T02:54:42.000+0100},
+        author    = {Griffin, Gregory and Holub, Alex and Perona, Pietro},
+        biburl    = {https://www.bibsonomy.org/bibtex/21f746f23ff0307826cca3e3be45f8de7/s364315},
+        interhash = {bfe1e648c1778c04baa60f23d1223375},
+        intrahash = {1f746f23ff0307826cca3e3be45f8de7},
+        publisher = {California Institute of Technology},
+        timestamp = {2021-01-21T02:54:42.000+0100},
+        year      = {2007}
+        }
+    """
+
+    @check_caltech256_dataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.Caltech256Node(self.dataset_dir, self.decode, self.sampler)
+
+
+class CelebADataset(MappableDataset):
+    """
+    A source dataset for reading and parsing CelebA dataset.
+    Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
+
+    The generated dataset has two columns: :py:obj:`[image, attr]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`attr` is of the uint32 type and one hot encoded.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in
+            the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None).
+        usage (str, optional): Specify the `train`, `valid`, `test` part or `all` parts of dataset
+            (default= `all`, will read all samples).
+        sampler (Sampler, optional): Object used to choose samples from the dataset (default=None).
+        decode (bool, optional): decode the images after reading (default=False).
+        extensions (list[str], optional): List of file extensions to be included in the dataset (default=None).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will include all images).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> celeba_dataset_dir = "/path/to/celeba_dataset_directory"
+        >>>
+        >>> # Read 5 samples from CelebA dataset
+        >>> dataset = ds.CelebADataset(dataset_dir=celeba_dataset_dir, usage='train', num_samples=5)
+        >>>
+        >>> # Note: In celeba dataset, each data dictionary owns keys "image" and "attr"
+
+    About CelebA dataset:
+
+    CelebFaces Attributes Dataset (CelebA) is a large-scale face attributes dataset
+    with more than 200K celebrity images, each with 40 attribute annotations.
+
+    The images in this dataset cover large pose variations and background clutter.
+    CelebA has large diversities, large quantities, and rich annotations, including
+
+    * 10,177 number of identities,
+    * 202,599 number of face images,
+    * 5 landmark locations, 40 binary attributes annotations per image.
+
+    The dataset can be employed as the training and test sets for the following computer
+    vision tasks: face attribute recognition, face detection, landmark (or facial part)
+    localization, and face editing & synthesis.
+
+    Original CelebA dataset structure:
+
+    .. code-block::
+
+        .
+        └── CelebA
+             ├── README.md
+             ├── Img
+             │    ├── img_celeba.7z
+             │    ├── img_align_celeba_png.7z
+             │    └── img_align_celeba.zip
+             ├── Eval
+             │    └── list_eval_partition.txt
+             └── Anno
+                  ├── list_landmarks_celeba.txt
+                  ├── list_landmarks_align_celeba.txt
+                  ├── list_bbox_celeba.txt
+                  ├── list_attr_celeba.txt
+                  └── identity_CelebA.txt
+
+    You can unzip the dataset files into the following structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── celeba_dataset_directory
+            ├── list_attr_celeba.txt
+            ├── 000001.jpg
+            ├── 000002.jpg
+            ├── 000003.jpg
+            ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @article{DBLP:journals/corr/LiuLWT14,
+        author        = {Ziwei Liu and Ping Luo and Xiaogang Wang and Xiaoou Tang},
+        title         = {Deep Learning Face Attributes in the Wild},
+        journal       = {CoRR},
+        volume        = {abs/1411.7766},
+        year          = {2014},
+        url           = {http://arxiv.org/abs/1411.7766},
+        archivePrefix = {arXiv},
+        eprint        = {1411.7766},
+        timestamp     = {Tue, 10 Dec 2019 15:37:26 +0100},
+        biburl        = {https://dblp.org/rec/journals/corr/LiuLWT14.bib},
+        bibsource     = {dblp computer science bibliography, https://dblp.org},
+        howpublished  = {http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html}
+        }
+    """
+
+    @check_celebadataset
+    def __init__(self, dataset_dir, num_parallel_workers=None, shuffle=None, usage='all', sampler=None, decode=False,
+                 extensions=None, num_samples=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.decode = replace_none(decode, False)
+        self.extensions = replace_none(extensions, [])
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        if self.usage != "all":
+            dataset_dir = os.path.realpath(self.dataset_dir)
+            partition_file = os.path.join(dataset_dir, "list_eval_partition.txt")
+            if os.path.exists(partition_file) is False:
+                raise RuntimeError("Partition file can not be found when usage is not 'all'.")
+        return cde.CelebANode(self.dataset_dir, self.usage, self.sampler, self.decode, self.extensions)
+
+
+
+class Cifar10Dataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Cifar10 dataset.
+    This api only supports parsing Cifar10 file in binary version now.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 50,000
+            train samples, `test` will read from 10,000 test samples, `all` will read from all 60,000 samples
+            (default=None, all samples).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> cifar10_dataset_dir = "/path/to/cifar10_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from CIFAR10 dataset in sequence
+        >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from CIFAR10 dataset
+        >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # 3) Get samples from CIFAR10 dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.Cifar10Dataset(dataset_dir=cifar10_dataset_dir, num_shards=2, shard_id=0)
+        >>>
+        >>> # In CIFAR10 dataset, each dictionary has keys "image" and "label"
+
+    About CIFAR-10 dataset:
+
+    The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+    with 6000 images per class. There are 50000 training images and 10000 test images.
+    The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks.
+
+    Here is the original CIFAR-10 dataset structure.
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── cifar-10-batches-bin
+             ├── data_batch_1.bin
+             ├── data_batch_2.bin
+             ├── data_batch_3.bin
+             ├── data_batch_4.bin
+             ├── data_batch_5.bin
+             ├── test_batch.bin
+             ├── readme.html
+             └── batches.meta.txt
+
+    Citation:
+
+    .. code-block::
+
+        @techreport{Krizhevsky09,
+        author       = {Alex Krizhevsky},
+        title        = {Learning multiple layers of features from tiny images},
+        institution  = {},
+        year         = {2009},
+        howpublished = {http://www.cs.toronto.edu/~kriz/cifar.html}
+        }
+    """
+
+    @check_mnist_cifar_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.Cifar10Node(self.dataset_dir, self.usage, self.sampler)
+
+
+class Cifar100Dataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Cifar100 dataset.
+
+    The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`coarse_label` and :py:obj:`fine_labels` are each a scalar of uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 50,000
+            train samples, `test` will read from 10,000 test samples, `all` will read from all 60,000 samples
+            (default=None, all samples).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and shuffle
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> cifar100_dataset_dir = "/path/to/cifar100_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from CIFAR100 dataset in sequence
+        >>> dataset = ds.Cifar100Dataset(dataset_dir=cifar100_dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from CIFAR100 dataset
+        >>> dataset = ds.Cifar100Dataset(dataset_dir=cifar100_dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # In CIFAR100 dataset, each dictionary has 3 keys: "image", "fine_label" and "coarse_label"
+
+    About CIFAR-100 dataset:
+
+    This dataset is just like the CIFAR-10, except it has 100 classes containing 600 images
+    each. There are 500 training images and 100 testing images per class. The 100 classes in
+    the CIFAR-100 are grouped into 20 superclasses. Each image comes with a "fine" label (the
+    class to which it belongs) and a "coarse" label (the superclass to which it belongs).
+
+    Here is the original CIFAR-100 dataset structure.
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── cifar-100-binary
+            ├── train.bin
+            ├── test.bin
+            ├── fine_label_names.txt
+            └── coarse_label_names.txt
+
+    Citation:
+
+    .. code-block::
+
+        @techreport{Krizhevsky09,
+        author       = {Alex Krizhevsky},
+        title        = {Learning multiple layers of features from tiny images},
+        institution  = {},
+        year         = {2009},
+        howpublished = {http://www.cs.toronto.edu/~kriz/cifar.html}
+        }
+    """
+
+    @check_mnist_cifar_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.Cifar100Node(self.dataset_dir, self.usage, self.sampler)
+
+
+class CityscapesDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Cityscapes dataset.
+
+    The generated dataset has two columns :py:obj:`[image, task]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`task` is of the uint8 type if task is not 'polygon' otherwise task is
+    a string tensor with serialize json.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str): Acceptable usages include `train`, `test`, `val` or `all` if quality_mode is `fine`
+            otherwise `train`, `train_extra`, `val` or `all` (default= `train`).
+        quality_mode (str): Acceptable quality_modes include `fine` or `coarse` (default= `fine`).
+        task (str): Acceptable tasks include `instance`, `semantic`, `polygon` or `color` (default= `instance`).
+        num_samples (int, optional): The number of images to be included in the dataset.
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        decode (bool, optional): Decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir is invalid or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If task is invalid.
+        ValueError: If quality_mode is invalid.
+        ValueError: If usage is invalid.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> cityscapes_dataset_dir = "/path/to/cityscapes_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from Cityscapes dataset in sequence
+        >>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, task="instance", quality_mode="fine",
+        ...                                usage="train", shuffle=False, num_parallel_workers=1)
+        >>>
+        >>> # 2) Randomly select 350 samples from Cityscapes dataset
+        >>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, num_samples=350, shuffle=True,
+        ...                                num_parallel_workers=1)
+        >>>
+        >>> # 3) Get samples from Cityscapes dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, num_shards=2, shard_id=0,
+        ...                                num_parallel_workers=1)
+        >>>
+        >>> # In Cityscapes dataset, each dictionary has keys "image" and "task"
+
+    About Cityscapes dataset:
+
+    The Cityscapes dataset consists of 5000 colour images with high quality dense pixel annotations and
+    19998 colour images with coarser polygonal annotations in 50 cities. There are 30 classes in this
+    dataset and the polygonal annotations include dense semantic segmentation and instance segmentation
+    for vehicle and people.
+
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    Taking the quality_mode of `fine` as an example.
+
+    .. code-block::
+
+        .
+        └── Cityscapes
+             ├── leftImg8bit
+             |    ├── train
+             |    |    ├── aachen
+             |    |    |    ├── aachen_000000_000019_leftImg8bit.png
+             |    |    |    ├── aachen_000001_000019_leftImg8bit.png
+             |    |    |    ├── ...
+             |    |    ├── bochum
+             |    |    |    ├── ...
+             |    |    ├── ...
+             |    ├── test
+             |    |    ├── ...
+             |    ├── val
+             |    |    ├── ...
+             └── gtFine
+                  ├── train
+                  |    ├── aachen
+                  |    |    ├── aachen_000000_000019_gtFine_color.png
+                  |    |    ├── aachen_000000_000019_gtFine_instanceIds.png
+                  |    |    ├── aachen_000000_000019_gtFine_labelIds.png
+                  |    |    ├── aachen_000000_000019_gtFine_polygons.json
+                  |    |    ├── aachen_000001_000019_gtFine_color.png
+                  |    |    ├── aachen_000001_000019_gtFine_instanceIds.png
+                  |    |    ├── aachen_000001_000019_gtFine_labelIds.png
+                  |    |    ├── aachen_000001_000019_gtFine_polygons.json
+                  |    |    ├── ...
+                  |    ├── bochum
+                  |    |    ├── ...
+                  |    ├── ...
+                  ├── test
+                  |    ├── ...
+                  └── val
+                       ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @inproceedings{Cordts2016Cityscapes,
+        title       = {The Cityscapes Dataset for Semantic Urban Scene Understanding},
+        author      = {Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler,
+                        Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt},
+        booktitle   = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+        year        = {2016}
+        }
+    """
+
+    @check_cityscapes_dataset
+    def __init__(self, dataset_dir, usage="train", quality_mode="fine", task="instance", num_samples=None,
+                 num_parallel_workers=None, shuffle=None, decode=None, sampler=None, num_shards=None,
+                 shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.task = task
+        self.quality_mode = quality_mode
+        self.usage = usage
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.CityscapesNode(self.dataset_dir, self.usage, self.quality_mode, self.task, self.decode, self.sampler)
+
+
+class CocoDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing COCO dataset.
+
+    CocoDataset supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and
+    Panoptic Segmentation of 2017 Train/Val/Test dataset.
+
+    The generated dataset with different task setting has different output columns:
+
+    - task = :py:obj:`Detection`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
+        :py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`.
+    - task = :py:obj:`Stuff`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[segmentation,dtype=float32]`, \
+        :py:obj:`[iscrowd,dtype=uint32]`.
+    - task = :py:obj:`Keypoint`, output columns: :py:obj:`[image, dtype=uint8]`, \
+        :py:obj:`[keypoints, dtype=float32]`, :py:obj:`[num_keypoints, dtype=uint32]`.
+    - task = :py:obj:`Panoptic`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
+        :py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`, :py:obj:`[area, dtype=uint32]`.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        annotation_file (str): Path to the annotation JSON file.
+        task (str, optional): Set the task type for reading COCO data. Supported task types:
+            `Detection`, `Stuff`, `Panoptic` and `Keypoint` (default= `Detection`).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the configuration file).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        decode (bool, optional): Decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+        extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
+            output at the end :py:obj:`[_meta-filename, dtype=string]` (default=False).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        RuntimeError: If parse JSON file failed.
+        ValueError: If task is not in [`Detection`, `Stuff`, `Panoptic`, `Keypoint`].
+        ValueError: If annotation_file is not exist.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op is added
+          to remove the prefix('_meta-').
+        - CocoDataset doesn't support PKSampler.
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> coco_dataset_dir = "/path/to/coco_dataset_directory/images"
+        >>> coco_annotation_file = "/path/to/coco_dataset_directory/annotation_file"
+        >>>
+        >>> # 1) Read COCO data for Detection task
+        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
+        ...                          annotation_file=coco_annotation_file,
+        ...                          task='Detection')
+        >>>
+        >>> # 2) Read COCO data for Stuff task
+        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
+        ...                          annotation_file=coco_annotation_file,
+        ...                          task='Stuff')
+        >>>
+        >>> # 3) Read COCO data for Panoptic task
+        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
+        ...                          annotation_file=coco_annotation_file,
+        ...                          task='Panoptic')
+        >>>
+        >>> # 4) Read COCO data for Keypoint task
+        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
+        ...                          annotation_file=coco_annotation_file,
+        ...                          task='Keypoint')
+        >>>
+        >>> # In COCO dataset, each dictionary has keys "image" and "annotation"
+
+    About COCO dataset:
+
+    COCO(Microsoft Common Objects in Context) is a large-scale object detection, segmentation, and captioning dataset
+    with several features: Object segmentation, Recognition in context, Superpixel stuff segmentation,
+    330K images (>200K labeled), 1.5 million object instances, 80 object categories, 91 stuff categories,
+    5 captions per image, 250,000 people with keypoints. In contrast to the popular ImageNet dataset, COCO has fewer
+    categories but more instances in per category.
+
+    You can unzip the original COCO-2017 dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── coco_dataset_directory
+             ├── train2017
+             │    ├── 000000000009.jpg
+             │    ├── 000000000025.jpg
+             │    ├── ...
+             ├── test2017
+             │    ├── 000000000001.jpg
+             │    ├── 000000058136.jpg
+             │    ├── ...
+             ├── val2017
+             │    ├── 000000000139.jpg
+             │    ├── 000000057027.jpg
+             │    ├── ...
+             └── annotations
+                  ├── captions_train2017.json
+                  ├── captions_val2017.json
+                  ├── instances_train2017.json
+                  ├── instances_val2017.json
+                  ├── person_keypoints_train2017.json
+                  └── person_keypoints_val2017.json
+
+    Citation:
+
+    .. code-block::
+
+        @article{DBLP:journals/corr/LinMBHPRDZ14,
+        author        = {Tsung{-}Yi Lin and Michael Maire and Serge J. Belongie and
+                        Lubomir D. Bourdev and  Ross B. Girshick and James Hays and
+                        Pietro Perona and Deva Ramanan and Piotr Doll{\'{a}}r and C. Lawrence Zitnick},
+        title         = {Microsoft {COCO:} Common Objects in Context},
+        journal       = {CoRR},
+        volume        = {abs/1405.0312},
+        year          = {2014},
+        url           = {http://arxiv.org/abs/1405.0312},
+        archivePrefix = {arXiv},
+        eprint        = {1405.0312},
+        timestamp     = {Mon, 13 Aug 2018 16:48:13 +0200},
+        biburl        = {https://dblp.org/rec/journals/corr/LinMBHPRDZ14.bib},
+        bibsource     = {dblp computer science bibliography, https://dblp.org}
+        }
+    """
+
+    @check_cocodataset
+    def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None,
+                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None, cache=None,
+                 extra_metadata=False):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.annotation_file = annotation_file
+        self.task = replace_none(task, "Detection")
+        self.decode = replace_none(decode, False)
+        self.extra_metadata = extra_metadata
+
+    def parse(self, children=None):
+        return cde.CocoNode(self.dataset_dir, self.annotation_file, self.task, self.decode, self.sampler,
+                            self.extra_metadata)
+
+    def get_class_indexing(self):
+        """
+        Get the class index.
+
+        Returns:
+            dict, a str-to-list<int> mapping from label name to index.
+
+        Examples:
+            >>> coco_dataset_dir = "/path/to/coco_dataset_directory/images"
+            >>> coco_annotation_file = "/path/to/coco_dataset_directory/annotation_file"
+            >>>
+            >>> # Read COCO data for Detection task
+            >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
+            ...                          annotation_file=coco_annotation_file,
+            ...                          task='Detection')
+            >>>
+            >>> class_indexing = dataset.get_class_indexing()
+        """
+        if self.task not in {"Detection", "Panoptic"}:
+            raise NotImplementedError("Only 'Detection' and 'Panoptic' support get_class_indexing.")
+        if self._class_indexing is None:
+            runtime_getter = self._init_tree_getters()
+            self._class_indexing = dict(runtime_getter[0].GetClassIndexing())
+        return self._class_indexing
+
+
+class DIV2KDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing DIV2KDataset dataset.
+
+    The generated dataset has two columns :py:obj:`[hr_image, lr_image]`.
+    The tensor of column :py:obj:`hr_image` is of the uint8 type.
+    The tensor of column :py:obj:`lr_image` is of the uint8 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str): Acceptable usages include `train`, `valid` or `all` (default= `train`).
+        downgrade (str): Acceptable downgrades include `bicubic`, `unknown`, `mild`, `difficult` or
+            `wild` (default= `bicubic`).
+        scale (int): Acceptable scales include 2, 3, 4 or 8 (default=2).
+            When `downgrade` is `bicubic`, scale can be 2, 3, 4, 8.
+            When `downgrade` is `unknown`, scale can only be 2, 3, 4.
+            When `downgrade` is `mild`, `difficult` or `wild`, scale can only be 4.
+        num_samples (int, optional): The number of images to be included in the dataset.
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        decode (bool, optional): Decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir is invalid or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If usage is invalid.
+        ValueError: If downgrade is invalid.
+        ValueError: If scale is invalid.
+        ValueError: If scale equal to 8 and downgrade not equal to `bicubic`.
+        ValueError: If downgrade in [`mild`, `difficult`, `wild`] and scale not equal to 4.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> div2k_dataset_dir = "/path/to/div2k_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from DIV2K dataset in sequence
+        >>> dataset = ds.DIV2KDataset(dataset_dir=div2k_dataset_dir, usage="train", scale=2, downgrade="bicubic",
+        ...                           shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from DIV2K dataset
+        >>> dataset = ds.DIV2KDataset(dataset_dir=div2k_dataset_dir, usage="train", scale=2, downgrade="bicubic",
+        ...                           num_samples=350, shuffle=True)
+        >>>
+        >>> # 3) Get samples from DIV2K dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.DIV2KDataset(dataset_dir=div2k_dataset_dir, usage="train", scale=2, downgrade="bicubic",
+        ...                           num_shards=2, shard_id=0)
+        >>>
+        >>> # In DIV2K dataset, each dictionary has keys "hr_image" and "lr_image"
+
+    About DIV2K dataset:
+
+    The DIV2K dataset consists of 1000 2K resolution images, among which 800 images are for training, 100 images
+    are for validation and 100 images are for testing. NTIRE 2017 and NTIRE 2018 include only training dataset
+    and validation dataset.
+
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    Take the training set as an example.
+
+    .. code-block::
+
+        .
+        └── DIV2K
+             ├── DIV2K_train_HR
+             |    ├── 0001.png
+             |    ├── 0002.png
+             |    ├── ...
+             ├── DIV2K_train_LR_bicubic
+             |    ├── X2
+             |    |    ├── 0001x2.png
+             |    |    ├── 0002x2.png
+             |    |    ├── ...
+             |    ├── X3
+             |    |    ├── 0001x3.png
+             |    |    ├── 0002x3.png
+             |    |    ├── ...
+             |    └── X4
+             |         ├── 0001x4.png
+             |         ├── 0002x4.png
+             |         ├── ...
+             ├── DIV2K_train_LR_unknown
+             |    ├── X2
+             |    |    ├── 0001x2.png
+             |    |    ├── 0002x2.png
+             |    |    ├── ...
+             |    ├── X3
+             |    |    ├── 0001x3.png
+             |    |    ├── 0002x3.png
+             |    |    ├── ...
+             |    └── X4
+             |         ├── 0001x4.png
+             |         ├── 0002x4.png
+             |         ├── ...
+             ├── DIV2K_train_LR_mild
+             |    ├── 0001x4m.png
+             |    ├── 0002x4m.png
+             |    ├── ...
+             ├── DIV2K_train_LR_difficult
+             |    ├── 0001x4d.png
+             |    ├── 0002x4d.png
+             |    ├── ...
+             ├── DIV2K_train_LR_wild
+             |    ├── 0001x4w.png
+             |    ├── 0002x4w.png
+             |    ├── ...
+             └── DIV2K_train_LR_x8
+                  ├── 0001x8.png
+                  ├── 0002x8.png
+                  ├── ...
+    Citation:
+
+    .. code-block::
+
+        @InProceedings{Agustsson_2017_CVPR_Workshops,
+        author    = {Agustsson, Eirikur and Timofte, Radu},
+        title     = {NTIRE 2017 Challenge on Single Image Super-Resolution: Dataset and Study},
+        booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
+        url       = "http://www.vision.ee.ethz.ch/~timofter/publications/Agustsson-CVPRW-2017.pdf",
+        month     = {July},
+        year      = {2017}
+        }
+    """
+
+    @check_div2k_dataset
+    def __init__(self, dataset_dir, usage="train", downgrade="bicubic", scale=2, num_samples=None,
+                 num_parallel_workers=None, shuffle=None, decode=None, sampler=None, num_shards=None,
+                 shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = usage
+        self.scale = scale
+        self.downgrade = downgrade
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.DIV2KNode(self.dataset_dir, self.usage, self.downgrade, self.scale, self.decode, self.sampler)
+
+
+class EMnistDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the EMNIST dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        name (str): Name of splits for this dataset, can be "byclass", "bymerge", "balanced", "letters", "digits"
+            or "mnist".
+        usage (str, optional): Usage of this dataset, can be "train", "test" or "all".
+            (default=None, will read all samples).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> emnist_dataset_dir = "/path/to/emnist_dataset_directory"
+        >>>
+        >>> # Read 3 samples from EMNIST dataset
+        >>> dataset = ds.EMnistDataset(dataset_dir=emnist_dataset_dir, name="mnist", num_samples=3)
+        >>>
+        >>> # Note: In emnist_dataset dataset, each dictionary has keys "image" and "label"
+
+    About EMNIST dataset:
+
+    The EMNIST dataset is a set of handwritten character digits derived from the NIST Special
+    Database 19 and converted to a 28x28 pixel image format and dataset structure that directly
+    matches the MNIST dataset. Further information on the dataset contents and conversion process
+    can be found in the paper available at https://arxiv.org/abs/1702.05373v1.
+
+    The numbers of characters and classes of each split of EMNIST are as follows:
+
+    By Class: 814,255 characters and 62 unbalanced classes.
+    By Merge: 814,255 characters and 47 unbalanced classes.
+    Balanced: 131,600 characters and 47 balanced classes.
+    Letters: 145,600 characters and 26 balanced classes.
+    Digits: 280,000 characters and 10 balanced classes.
+    MNIST: 70,000 characters and 10 balanced classes.
+
+    Here is the original EMNIST dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── mnist_dataset_dir
+             ├── emnist-mnist-train-images-idx3-ubyte
+             ├── emnist-mnist-train-labels-idx1-ubyte
+             ├── emnist-mnist-test-images-idx3-ubyte
+             ├── emnist-mnist-test-labels-idx1-ubyte
+             ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @article{cohen_afshar_tapson_schaik_2017,
+        title        = {EMNIST: Extending MNIST to handwritten letters},
+        DOI          = {10.1109/ijcnn.2017.7966217},
+        journal      = {2017 International Joint Conference on Neural Networks (IJCNN)},
+        author       = {Cohen, Gregory and Afshar, Saeed and Tapson, Jonathan and Schaik, Andre Van},
+        year         = {2017},
+        howpublished = {https://www.westernsydney.edu.au/icns/reproducible_research/
+                        publication_support_materials/emnist}
+        }
+    """
+
+    @check_emnist_dataset
+    def __init__(self, dataset_dir, name, usage=None, num_samples=None, num_parallel_workers=None,
+                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.name = name
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.EMnistNode(self.dataset_dir, self.name, self.usage, self.sampler)
+
+
+class FakeImageDataset(MappableDataset):
+    """
+    A source dataset for generating fake images.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        num_images (int, optional): Number of images to generate in the dataset (default=1000).
+        image_size (tuple, optional):  Size of the fake image (default=(224, 224, 3)).
+        num_classes (int, optional): Number of classes in the dataset (default=10).
+        base_seed (int, optional): Offsets the index-based random seed used to generate each image (default=0).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter 'sampler'
+         - Parameter 'shuffle'
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> # Read 3 samples from FakeImage dataset
+        >>> dataset = ds.FakeImageDataset(num_images=1000, image_size=(224,224,3),
+        ...                               num_classes=10, base_seed=0, num_samples=3)
+        >>>
+        >>> # Note: In FakeImage dataset, each dictionary has keys "image" and "label"
+    """
+
+    @check_fake_image_dataset
+    def __init__(self, num_images=1000, image_size=(224, 224, 3), num_classes=10, base_seed=0, num_samples=None,
+                 num_parallel_workers=None, shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.num_images = num_images
+        self.image_size = image_size
+        self.num_classes = num_classes
+        self.base_seed = base_seed
+
+    def parse(self, children=None):
+        return cde.FakeImageNode(self.num_images, self.image_size, self.num_classes, self.base_seed, self.sampler)
+
+
+class FashionMnistDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the FASHION-MNIST dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all`. `train` will read from 60,000
+            train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
+            (default=None, will read all samples)
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> fashion_mnist_dataset_dir = "/path/to/fashion_mnist_dataset_directory"
+        >>>
+        >>> # Read 3 samples from FASHIONMNIST dataset
+        >>> dataset = ds.FashionMnistDataset(dataset_dir=fashion_mnist_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In FASHIONMNIST dataset, each dictionary has keys "image" and "label"
+
+    About Fashion-MNIST dataset:
+
+    Fashion-MNIST is a dataset of Zalando's article images—consisting of a training set of 60,000 examples and
+    a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.
+    We intend Fashion-MNIST to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking
+    machine learning algorithms. It shares the same image size and structure of training and testing splits.
+
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── fashionmnist_dataset_dir
+             ├── t10k-images-idx3-ubyte
+             ├── t10k-labels-idx1-ubyte
+             ├── train-images-idx3-ubyte
+             └── train-labels-idx1-ubyte
+
+    Citation:
+
+    .. code-block::
+
+        @online{xiao2017/online,
+          author       = {Han Xiao and Kashif Rasul and Roland Vollgraf},
+          title        = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
+          date         = {2017-08-28},
+          year         = {2017},
+          eprintclass  = {cs.LG},
+          eprinttype   = {arXiv},
+          eprint       = {cs.LG/1708.07747},
+        }
+    """
+
+    @check_mnist_cifar_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.FashionMnistNode(self.dataset_dir, self.usage, self.sampler)
+
+
+class FlickrDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Flickr8k and Flickr30k dataset.
+
+    The generated dataset has two columns :py:obj:`[image, annotation]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`annotation` is a tensor which contains 5 annotations string,
+    such as ["a", "b", "c", "d", "e"].
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        annotation_file (str): Path to the root directory that contains the annotation.
+        num_samples (int, optional): The number of images to be included in the dataset.
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        decode (bool, optional): Decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir is not valid or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If annotation_file is not exist.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> flickr_dataset_dir = "/path/to/flickr_dataset_directory"
+        >>> annotation_file = "/path/to/flickr_annotation_file"
+        >>>
+        >>> # 1) Get all samples from FLICKR dataset in sequence
+        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
+        ...                            annotation_file=annotation_file,
+        ...                            shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from FLICKR dataset
+        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
+        ...                            annotation_file=annotation_file,
+        ...                            num_samples=350,
+        ...                            shuffle=True)
+        >>>
+        >>> # 3) Get samples from FLICKR dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.FlickrDataset(dataset_dir=flickr_dataset_dir,
+        ...                            annotation_file=annotation_file,
+        ...                            num_shards=2,
+        ...                            shard_id=0)
+        >>>
+        >>> # In FLICKR dataset, each dictionary has keys "image" and "annotation"
+
+    About Flickr8k dataset:
+
+    The Flickr8k dataset consists of 8092 colour images. There are 40460 annotations in the Flickr8k.token.txt,
+    each image has 5 annotations.
+
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── Flickr8k
+             ├── Flickr8k_Dataset
+             │    ├── 1000268201_693b08cb0e.jpg
+             │    ├── 1001773457_577c3a7d70.jpg
+             │    ├── ...
+             └── Flickr8k.token.txt
+
+    Citation:
+
+    .. code-block::
+
+        @article{DBLP:journals/jair/HodoshYH13,
+        author    = {Micah Hodosh and Peter Young and Julia Hockenmaier},
+        title     = {Framing Image Description as a Ranking Task: Data, Models and Evaluation Metrics},
+        journal   = {J. Artif. Intell. Res.},
+        volume    = {47},
+        pages     = {853--899},
+        year      = {2013},
+        url       = {https://doi.org/10.1613/jair.3994},
+        doi       = {10.1613/jair.3994},
+        timestamp = {Mon, 21 Jan 2019 15:01:17 +0100},
+        biburl    = {https://dblp.org/rec/journals/jair/HodoshYH13.bib},
+        bibsource = {dblp computer science bibliography, https://dblp.org}
+        }
+
+    About Flickr30k dataset:
+
+    The Flickr30k dataset consists of 31783 colour images. There are 158915 annotations in
+    the results_20130124.token, each image has 5 annotations.
+
+    You can unzip the dataset files into the following directory structure and read by MindSpore's API.
+
+    Citation:
+
+    .. code-block::
+
+        .
+        └── Flickr30k
+             ├── flickr30k-images
+             │    ├── 1000092795.jpg
+             │    ├── 10002456.jpg
+             │    ├── ...
+             └── results_20130124.token
+
+    .. code-block::
+
+        @article{DBLP:journals/tacl/YoungLHH14,
+        author    = {Peter Young and Alice Lai and Micah Hodosh and Julia Hockenmaier},
+        title     = {From image descriptions to visual denotations: New similarity metrics
+                     for semantic inference over event descriptions},
+        journal   = {Trans. Assoc. Comput. Linguistics},
+        volume    = {2},
+        pages     = {67--78},
+        year      = {2014},
+        url       = {https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/229},
+        timestamp = {Wed, 17 Feb 2021 21:55:25 +0100},
+        biburl    = {https://dblp.org/rec/journals/tacl/YoungLHH14.bib},
+        bibsource = {dblp computer science bibliography, https://dblp.org}
+        }
+    """
+
+    @check_flickr_dataset
+    def __init__(self, dataset_dir, annotation_file, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 decode=None, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.annotation_file = annotation_file
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.FlickrNode(self.dataset_dir, self.annotation_file, self.decode, self.sampler)
+
+
+class _Flowers102Dataset:
+    """
+    Mainly for loading Flowers102 Dataset, and return one row each time.
+    """
+
+    def __init__(self, dataset_dir, task, usage, decode):
+        self.dataset_dir = os.path.realpath(dataset_dir)
+        self.task = task
+        self.usage = usage
+        self.decode = decode
+
+        if self.task == "Classification":
+            self.column_names = ["image", "label"]
+        else:
+            self.column_names = ["image", "segmentation", "label"]
+
+        labels_path = os.path.join(self.dataset_dir, "imagelabels.mat")
+        setid_path = os.path.join(self.dataset_dir, "setid.mat")
+        # minus one to transform 1~102 to 0 ~ 101
+        self.labels = (loadmat(labels_path)["labels"][0] - 1).astype(np.uint32)
+        self.setid = loadmat(setid_path)
+
+        if self.usage == 'train':
+            self.indices = self.setid["trnid"][0].tolist()
+        elif self.usage == 'test':
+            self.indices = self.setid["tstid"][0].tolist()
+        elif self.usage == 'valid':
+            self.indices = self.setid["valid"][0].tolist()
+        elif self.usage == 'all':
+            self.indices = self.setid["trnid"][0].tolist()
+            self.indices += self.setid["tstid"][0].tolist()
+            self.indices += self.setid["valid"][0].tolist()
+        else:
+            raise ValueError("Input usage is not within the valid set of ['train', 'valid', 'test', 'all'].")
+
+    def __getitem__(self, index):
+        # range: 1 ~ 8189
+        image_path = os.path.join(self.dataset_dir, "jpg", "image_" + str(self.indices[index]).zfill(5) + ".jpg")
+        if not os.path.exists(image_path):
+            raise RuntimeError("Can not find image file: " + image_path)
+
+        if self.decode is True:
+            image = np.asarray(Image.open(image_path).convert("RGB"))
+        else:
+            image = np.fromfile(image_path, dtype=np.uint8)
+
+        label = self.labels[self.indices[index] - 1]
+
+        if self.task == "Segmentation":
+            segmentation_path = \
+                os.path.join(self.dataset_dir, "segmim", "segmim_" + str(self.indices[index]).zfill(5) + ".jpg")
+            if not os.path.exists(segmentation_path):
+                raise RuntimeError("Can not find segmentation file: " + segmentation_path)
+            if self.decode is True:
+                segmentation = np.asarray(Image.open(segmentation_path).convert("RGB"))
+            else:
+                segmentation = np.fromfile(segmentation_path, dtype=np.uint8)
+            return image, segmentation, label
+
+        return image, label
+
+    def __len__(self):
+        return len(self.indices)
+
+
+class Flowers102Dataset(GeneratorDataset):
+    """
+    A source dataset for reading and parsing Flowers102 dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]` or three :py:obj:`[image, segmentation, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`segmentation` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar or a tensor of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        task (str): Specify the 'Classification' or 'Segmentation' task (default='Classification').
+        usage (str): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
+            (default='all', will read all samples).
+        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
+        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
+            (default=None, expected order behavior shown in the table).
+        decode (bool, optional): Whether or not to decode the images and segmentations after reading (default=False).
+        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
+            input is required (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
+            sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
+            when num_shards is also specified. Random accessible input is required.
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter 'sampler'
+         - Parameter 'shuffle'
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> flowers102_dataset_dir = "/path/to/flowers102_dataset_directory"
+        >>> dataset = ds.Flowers102Dataset(dataset_dir=flowers102_dataset_dir,
+        ...                                task="Classification",
+        ...                                usage="all",
+        ...                                decode=True)
+
+    About Flowers102 dataset:
+
+    Flowers102 dataset consists of 102 flower categories.
+    The flowers commonly occur in the United Kingdom.
+    Each class consists of between 40 and 258 images.
+
+    Here is the original Flowers102 dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └── flowes102_dataset_dir
+             ├── imagelabels.mat
+             ├── setid.mat
+             ├── jpg
+                  ├── image_00001.jpg
+                  ├── image_00002.jpg
+                  ├── ...
+             ├── segmim
+                  ├── segmim_00001.jpg
+                  ├── segmim_00002.jpg
+                  ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @InProceedings{Nilsback08,
+          author       = "Maria-Elena Nilsback and Andrew Zisserman",
+          title        = "Automated Flower Classification over a Large Number of Classes",
+          booktitle    = "Indian Conference on Computer Vision, Graphics and Image Processing",
+          month        = "Dec",
+          year         = "2008",
+        }
+    """
+
+    @check_flowers102dataset
+    def __init__(self, dataset_dir, task="Classification", usage="all", num_samples=None, num_parallel_workers=1,
+                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
+        self.dataset_dir = os.path.realpath(dataset_dir)
+        self.task = replace_none(task, "Classification")
+        self.usage = replace_none(usage, "all")
+        self.decode = replace_none(decode, False)
+        dataset = _Flowers102Dataset(self.dataset_dir, self.task, self.usage, self.decode)
+        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
+                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
+                         num_shards=num_shards, shard_id=shard_id)
+
+    def get_class_indexing(self):
+        """
+        Get the class index.
+
+        Returns:
+            dict, a str-to-int mapping from label name to index.
+        """
+        class_names = [
+            "pink primrose", "hard-leaved pocket orchid", "canterbury bells",
+            "sweet pea", "english marigold", "tiger lily", "moon orchid",
+            "bird of paradise", "monkshood", "globe thistle", "snapdragon",
+            "colt's foot", "king protea", "spear thistle", "yellow iris",
+            "globe-flower", "purple coneflower", "peruvian lily", "balloon flower",
+            "giant white arum lily", "fire lily", "pincushion flower", "fritillary",
+            "red ginger", "grape hyacinth", "corn poppy", "prince of wales feathers",
+            "stemless gentian", "artichoke", "sweet william", "carnation",
+            "garden phlox", "love in the mist", "mexican aster", "alpine sea holly",
+            "ruby-lipped cattleya", "cape flower", "great masterwort", "siam tulip",
+            "lenten rose", "barbeton daisy", "daffodil", "sword lily", "poinsettia",
+            "bolero deep blue", "wallflower", "marigold", "buttercup", "oxeye daisy",
+            "common dandelion", "petunia", "wild pansy", "primula", "sunflower",
+            "pelargonium", "bishop of llandaff", "gaura", "geranium", "orange dahlia",
+            "pink-yellow dahlia?", "cautleya spicata", "japanese anemone",
+            "black-eyed susan", "silverbush", "californian poppy", "osteospermum",
+            "spring crocus", "bearded iris", "windflower", "tree poppy", "gazania",
+            "azalea", "water lily", "rose", "thorn apple", "morning glory",
+            "passion flower", "lotus", "toad lily", "anthurium", "frangipani",
+            "clematis", "hibiscus", "columbine", "desert-rose", "tree mallow",
+            "magnolia", "cyclamen", "watercress", "canna lily", "hippeastrum",
+            "bee balm", "ball moss", "foxglove", "bougainvillea", "camellia", "mallow",
+            "mexican petunia", "bromelia", "blanket flower", "trumpet creeper",
+            "blackberry lily"
+        ]
+
+        class_dict = {}
+        for i, class_name in enumerate(class_names):
+            class_dict[class_name] = i
+
+        return class_dict
+
+
+class ImageFolderDataset(MappableDataset):
+    """
+    A source dataset that reads images from a tree of directories.
+    All images within one folder have the same label.
+
+    The generated dataset has two columns: :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        extensions (list[str], optional): List of file extensions to be
+            included in the dataset (default=None).
+        class_indexing (dict, optional): A str-to-int mapping from folder name to index
+            (default=None, the folder names will be sorted
+            alphabetically and each class will be given a
+            unique index starting from 0).
+        decode (bool, optional): Decode the images after reading (default=False).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        RuntimeError: If class_indexing is not a dictionary.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
+        >>>
+        >>> # 1) Read all samples (image files) in image_folder_dataset_dir with 8 threads
+        >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir,
+        ...                                 num_parallel_workers=8)
+        >>>
+        >>> # 2) Read all samples (image files) from folder cat and folder dog with label 0 and 1
+        >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir,
+        ...                                 class_indexing={"cat":0, "dog":1})
+        >>>
+        >>> # 3) Read all samples (image files) in image_folder_dataset_dir with extensions .JPEG and .png (case sensitive)
+        >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir,
+        ...                                 extensions=[".JPEG", ".png"])
+
+    About ImageFolderDataset:
+
+    You can construct the following directory structure from your dataset files and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── image_folder_dataset_directory
+             ├── class1
+             │    ├── 000000000001.jpg
+             │    ├── 000000000002.jpg
+             │    ├── ...
+             ├── class2
+             │    ├── 000000000001.jpg
+             │    ├── 000000000002.jpg
+             │    ├── ...
+             ├── class3
+             │    ├── 000000000001.jpg
+             │    ├── 000000000002.jpg
+             │    ├── ...
+             ├── classN
+             ├── ...
+    """
+
+    @check_imagefolderdataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
+                 extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.extensions = replace_none(extensions, [])
+        self.class_indexing = replace_none(class_indexing, {})
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing)
+
+
+class KMnistDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the KMNIST dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 60,000
+            train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
+            (default=None, will read all samples)
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If `dataset_dir` does not contain data files.
+        RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
+        RuntimeError: If `sampler` and `shuffle` are specified at the same time.
+        RuntimeError: If `sampler` and sharding are specified at the same time.
+        RuntimeError: If `num_shards` is specified but `shard_id` is None.
+        RuntimeError: If `shard_id` is specified but `num_shards` is None.
+        ValueError: If `shard_id` is invalid (out of range [0, `num_shards`]).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> kmnist_dataset_dir = "/path/to/kmnist_dataset_directory"
+        >>>
+        >>> # Read 3 samples from KMNIST dataset
+        >>> dataset = ds.KMnistDataset(dataset_dir=kmnist_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In kmnist_dataset dataset, each dictionary has keys "image" and "label"
+
+    About KMNIST dataset:
+
+    KMNIST is a dataset, adapted from Kuzushiji Dataset, as a drop-in replacement for MNIST dataset,
+    which is the most famous dataset in the machine learning community.
+
+    Here is the original KMNIST dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── kmnist_dataset_dir
+             ├── t10k-images-idx3-ubyte
+             ├── t10k-labels-idx1-ubyte
+             ├── train-images-idx3-ubyte
+             └── train-labels-idx1-ubyte
+
+    Citation:
+
+    .. code-block::
+
+        @online{clanuwat2018deep,
+          author       = {Tarin Clanuwat and Mikel Bober-Irizar and Asanobu Kitamoto and
+                           Alex Lamb and Kazuaki Yamamoto and David Ha},
+          title        = {Deep Learning for Classical Japanese Literature},
+          date         = {2018-12-03},
+          year         = {2018},
+          eprintclass  = {cs.CV},
+          eprinttype   = {arXiv},
+          eprint       = {cs.CV/1812.01718},
+        }
+    """
+
+    @check_mnist_cifar_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.KMnistNode(self.dataset_dir, self.usage, self.sampler)
+
+
+class ManifestDataset(MappableDataset):
+    """
+    A source dataset for reading images from a Manifest file.
+
+    The generated dataset has two columns: :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is of a scalar of uint64 type.
+
+    Args:
+        dataset_file (str): File to be read.
+        usage (str, optional): Acceptable usages include `train`, `eval` and `inference` (default= `train`).
+        num_samples (int, optional): The number of images to be included in the dataset.
+            (default=None, will include all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        class_indexing (dict, optional): A str-to-int mapping from label name to index
+            (default=None, the folder names will be sorted alphabetically and each
+            class will be given a unique index starting from 0).
+        decode (bool, optional): decode the images after reading (default=False).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the max number of samples per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_files are not valid or do not exist.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        RuntimeError: If class_indexing is not a dictionary.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> manifest_dataset_dir = "/path/to/manifest_dataset_file"
+        >>>
+        >>> # 1) Read all samples specified in manifest_dataset_dir dataset with 8 threads for training
+        >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, usage="train", num_parallel_workers=8)
+        >>>
+        >>> # 2) Read samples (specified in manifest_file.manifest) for shard 0 in a 2-way distributed training setup
+        >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, num_shards=2, shard_id=0)
+    """
+
+    @check_manifestdataset
+    def __init__(self, dataset_file, usage="train", num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_file = dataset_file
+        self.decode = replace_none(decode, False)
+        self.usage = replace_none(usage, "train")
+        self.class_indexing = replace_none(class_indexing, {})
+
+    def parse(self, children=None):
+        return cde.ManifestNode(self.dataset_file, self.usage, self.sampler, self.class_indexing, self.decode)
+
+    def get_class_indexing(self):
+        """
+        Get the class index.
+
+        Returns:
+            dict, a str-to-int mapping from label name to index.
+
+        Examples:
+            >>> manifest_dataset_dir = "/path/to/manifest_dataset_file"
+            >>>
+            >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir)
+            >>> class_indexing = dataset.get_class_indexing()
+        """
+        if self.class_indexing is None or not self.class_indexing:
+            if self._class_indexing is None:
+                runtime_getter = self._init_tree_getters()
+                self._class_indexing = runtime_getter[0].GetClassIndexing()
+            self.class_indexing = {}
+            for pair in self._class_indexing:
+                self.class_indexing[pair[0]] = pair[1][0]
+        return self.class_indexing
+
+
+class MnistDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the MNIST dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test` or `all` . `train` will read from 60,000
+            train samples, `test` will read from 10,000 test samples, `all` will read from all 70,000 samples.
+            (default=None, will read all samples)
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> mnist_dataset_dir = "/path/to/mnist_dataset_directory"
+        >>>
+        >>> # Read 3 samples from MNIST dataset
+        >>> dataset = ds.MnistDataset(dataset_dir=mnist_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In mnist_dataset dataset, each dictionary has keys "image" and "label"
+
+    About MNIST dataset:
+
+    The MNIST database of handwritten digits has a training set of 60,000 examples,
+    and a test set of 10,000 examples. It is a subset of a larger set available from
+    NIST. The digits have been size-normalized and centered in a fixed-size image.
+
+    Here is the original MNIST dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── mnist_dataset_dir
+             ├── t10k-images-idx3-ubyte
+             ├── t10k-labels-idx1-ubyte
+             ├── train-images-idx3-ubyte
+             └── train-labels-idx1-ubyte
+
+    Citation:
+
+    .. code-block::
+
+        @article{lecun2010mnist,
+        title        = {MNIST handwritten digit database},
+        author       = {LeCun, Yann and Cortes, Corinna and Burges, CJ},
+        journal      = {ATT Labs [Online]},
+        volume       = {2},
+        year         = {2010},
+        howpublished = {http://yann.lecun.com/exdb/mnist}
+        }
+    """
+
+    @check_mnist_cifar_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.MnistNode(self.dataset_dir, self.usage, self.sampler)
+
+
+class PhotoTourDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the PhotoTour dataset.
+
+    The generated dataset with different usage has different output columns.
+    If train, the generated dataset has one column :py:obj:`[image]`,
+    else three columns :py:obj:`[image1, image2, matches]`.
+    The tensor of column :py:obj:`image`, :py:obj:`image1` and :py:obj:`image2` is of the uint8 type.
+    The tensor of column :py:obj:`matches` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        name (str): Name of the dataset to load,
+            should be one of 'notredame', 'yosemite', 'liberty', 'notredame_harris',
+            'yosemite_harris' or 'liberty_harris'.
+        usage (str, optional): Usage of the dataset, can be `train` or `test` (Default=None, will be set to 'train').
+            When usage is `train`, number of samples for each `name` is
+            {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092, 'liberty_harris': 379587,
+             'yosemite_harris': 450912, 'notredame_harris': 325295}.
+            When usage is `test`, will read 100,000 samples for testing.
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If usage is not in ["train", "test"].
+        ValueError: If name is not in ["notredame", "yosemite", "liberty",
+            "notredame_harris", "yosemite_harris", "liberty_harris"].
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
+          below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 64 64 1
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> # Read 3 samples from PhotoTour dataset.
+        >>> dataset = ds.PhotoTourDataset(dataset_dir="/path/to/photo_tour_dataset_directory",
+        ...                               name='liberty', usage='train', num_samples=3)
+        >>>
+        >>> # In PhotoTourDataset dataset, if usage is 'train', each dictionary has key "image",
+        >>> # else has keys "image1" "image2" and "matches".
+
+    About PhotoTour dataset:
+
+    The data is taken from Photo Tourism reconstructions from Trevi Fountain (Rome), Notre Dame (Paris) and Half
+    Dome (Yosemite). Each dataset consists of a series of corresponding patches, which are obtained by projecting
+    3D points from Photo Tourism reconstructions back into the original images.
+
+    The dataset consists of 1024 x 1024 bitmap (.bmp) images, each containing a 16 x 16 array of image patches.
+    Each patch is sampled as 64 x 64 grayscale, with a canonical scale and orientation. For details of how the scale
+    and orientation is established, please see the paper. An associated metadata file info.txt contains the match
+    information. Each row of info.txt corresponds to a separate patch, with the patches ordered from left to right and
+    top to bottom in each bitmap image. The first number on each row of info.txt is the 3D point ID from which that
+    patch was sampled -- patches with the same 3D point ID are projected from the same 3D point (into different images).
+    The second number in info.txt corresponds to the image from which the patch was sampled, and is not used at present.
+
+    You can unzip the original PhotoTour dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └── photo_tour_dataset_directory
+            ├── liberty/
+            │    ├── info.txt                 // two columns: 3D_point_ID, unused
+            │    ├── m50_100000_100000_0.txt  // seven columns: patch_ID1, 3D_point_ID1, unused1,
+            │    │                            // patch_ID2, 3D_point_ID2, unused2, unused3
+            │    ├── patches0000.bmp          // 1024*1024 pixels, with 16 * 16 patches.
+            │    ├── patches0001.bmp
+            │    ├── ...
+            ├── yosemite/
+            │    ├── ...
+            ├── notredame/
+            │    ├── ...
+            ├── liberty_harris/
+            │    ├── ...
+            ├── yosemite_harris/
+            │    ├── ...
+            ├── notredame_harris/
+            │    ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @INPROCEEDINGS{4269996,
+            author={Winder, Simon A. J. and Brown, Matthew},
+            booktitle={2007 IEEE Conference on Computer Vision and Pattern Recognition},
+            title={Learning Local Image Descriptors},
+            year={2007},
+            volume={},
+            number={},
+            pages={1-8},
+            doi={10.1109/CVPR.2007.382971}
+        }
+    """
+
+    @check_photo_tour_dataset
+    def __init__(self, dataset_dir, name, usage=None, num_samples=None, num_parallel_workers=None,
+                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.name = name
+        self.usage = replace_none(usage, "train")
+
+    def parse(self, children=None):
+        return cde.PhotoTourNode(self.dataset_dir, self.name, self.usage, self.sampler)
+
+
+class Places365Dataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the Places365 dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train-standard`, `train-challenge` or `val`
+            (default=None, will be set to 'train-standard').
+        small (bool, optional): Use 256 * 256 images (True) or high resolution images (False) (default=False).
+        decode (bool, optional): Decode the images after reading (default=True).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+        ValueError: If usage is not in ["train-standard", "train-challenge", "val"].
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> place365_dataset_dir = "/path/to/place365_dataset_directory"
+        >>>
+        >>> # Read 3 samples from Places365 dataset
+        >>> dataset = ds.Places365Dataset(dataset_dir=place365_dataset_dir, usage='train-standard',
+        ...                               small=True, decode=True, num_samples=3)
+        >>>
+        >>> # In places365 dataset, each dictionary has keys "image" and "label".
+
+    About Places365 dataset:
+
+    Convolutional neural networks (CNNs) trained on the Places2 Database can be used for scene recognition as well as
+    generic deep scene features for visual recognition.
+
+    The author releases the data of Places365-Standard and the data of Places365-Challenge to the public.
+    Places365-Standard is the core set of Places2 Database, which has been used to train the Places365-CNNs. The author
+    will add other kinds of annotation on the Places365-Standard in the future. Places365-Challenge is the competition
+    set of Places2 Database, which has 6.2 million extra images compared to the Places365-Standard.
+    The Places365-Challenge will be used for the Places Challenge 2016.
+
+    You can unzip the original Places365 dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └─├── categories_places365.txt
+            ├── places365_train-standard.txt
+            ├── places365_train-challenge.txt
+            ├── val_large/
+            │    ├── Places365_val_00000001.jpg
+            │    ├── Places365_val_00000002.jpg
+            │    ├── Places365_val_00000003.jpg
+            │    ├── ...
+            ├── val_256/
+            │    ├── ...
+            ├── data_large_standard/
+            │    ├── ...
+            ├── data_256_standard/
+            │    ├── ...
+            ├── data_large_challenge/
+            │    ├── ...
+            ├── data_256_challenge /
+            │    ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        article{zhou2017places,
+            title={Places: A 10 million Image Database for Scene Recognition},
+            author={Zhou, Bolei and Lapedriza, Agata and Khosla, Aditya and Oliva, Aude and Torralba, Antonio},
+            journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+            year={2017},
+            publisher={IEEE}
+        }
+    """
+
+    @check_places365_dataset
+    def __init__(self, dataset_dir, usage=None, small=True, decode=False, num_samples=None, num_parallel_workers=None,
+                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = os.path.abspath(dataset_dir)
+        self.usage = replace_none(usage, "train-standard")
+        self.small = small
+        self.decode = decode
+
+    def parse(self, children=None):
+        return cde.Places365Node(self.dataset_dir, self.usage, self.small, self.decode, self.sampler)
+
+
+class QMnistDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the QMNIST dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar when `compat` is True else a tensor both of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test`, `test10k`, `test50k`, `nist`
+            or `all` (default=None, will read all samples).
+        compat (bool, optional): Whether the label for each example is class number (compat=True) or the full QMNIST
+            information (compat=False) (default=True).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> qmnist_dataset_dir = "/path/to/qmnist_dataset_directory"
+        >>>
+        >>> # Read 3 samples from QMNIST train dataset
+        >>> dataset = ds.QMnistDataset(dataset_dir=qmnist_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In QMNIST dataset, each dictionary has keys "image" and "label"
+
+    About QMNIST dataset:
+
+    The QMNIST dataset was generated from the original data found in the NIST Special Database 19 with the goal to
+    match the MNIST preprocessing as closely as possible.
+    Through an iterative process, researchers tried to generate an additional 50k images of MNIST-like data.
+    They started with a reconstruction process given in the paper and used the Hungarian algorithm to find the best
+    matches between the original MNIST samples and their reconstructed samples.
+
+    Here is the original QMNIST dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── qmnist_dataset_dir
+             ├── qmnist-train-images-idx3-ubyte
+             ├── qmnist-train-labels-idx2-int
+             ├── qmnist-test-images-idx3-ubyte
+             ├── qmnist-test-labels-idx2-int
+             ├── xnist-images-idx3-ubyte
+             └── xnist-labels-idx2-int
+
+    Citation:
+
+    .. code-block::
+
+        @incollection{qmnist-2019,
+           title = "Cold Case: The Lost MNIST Digits",
+           author = "Chhavi Yadav and L\'{e}on Bottou",\
+           booktitle = {Advances in Neural Information Processing Systems 32},
+           year = {2019},
+           publisher = {Curran Associates, Inc.},
+        }
+    """
+
+    @check_qmnist_dataset
+    def __init__(self, dataset_dir, usage=None, compat=True, num_samples=None, num_parallel_workers=None,
+                 shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+        self.compat = compat
+
+    def parse(self, children=None):
+        return cde.QMnistNode(self.dataset_dir, self.usage, self.compat, self.sampler)
+
+
+class RandomDataset(SourceDataset):
+    """
+    A source dataset that generates random data.
+
+    Args:
+        total_rows (int, optional): Number of samples for the dataset to generate
+            (default=None, number of samples is random).
+        schema (Union[str, Schema], optional): Path to the JSON schema file or schema object (default=None).
+            If the schema is not provided, the random dataset generates a random schema.
+        columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, all samples).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+    """
+
+    @check_random_dataset
+    def __init__(self, total_rows=None, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
+                 cache=None, shuffle=None, num_shards=None, shard_id=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.total_rows = total_rows
+        if schema is not None:
+            self.total_rows = replace_none(total_rows, Schema.get_num_rows(schema))
+        self.schema = schema
+        self.columns_list = replace_none(columns_list, [])
+
+    def parse(self, children=None):
+        schema = self.schema.cpp_schema if isinstance(self.schema, Schema) else self.schema
+        return cde.RandomNode(self.total_rows, schema, self.columns_list)
+
+
+class _SBDataset:
+    """
+    Dealing with the data file with .mat extension, and return one row in tuple (image, task) each time.
+    """
+
+    def __init__(self, dataset_dir, task, usage, decode):
+        self.column_list = ['image', 'task']
+        self.task = task
+        self.images_path = os.path.join(dataset_dir, 'img')
+        self.cls_path = os.path.join(dataset_dir, 'cls')
+        self._loadmat = loadmat
+        self.categories = 20
+        self.decode = replace_none(decode, False)
+
+        if usage == "all":
+            image_names = []
+            for item in ["train", "val"]:
+                usage_path = os.path.join(dataset_dir, item + '.txt')
+                if not os.path.exists(usage_path):
+                    raise FileNotFoundError("SBDataset: {0} not found".format(usage_path))
+                with open(usage_path, 'r') as f:
+                    image_names += [x.strip() for x in f.readlines()]
+        else:
+            usage_path = os.path.join(dataset_dir, usage + '.txt')
+            if not os.path.exists(usage_path):
+                raise FileNotFoundError("SBDataset: {0} not found".format(usage_path))
+            with open(usage_path, 'r') as f:
+                image_names = [x.strip() for x in f.readlines()]
+
+        self.images = [os.path.join(self.images_path, i + ".jpg") for i in image_names]
+        self.clss = [os.path.join(self.cls_path, i + ".mat") for i in image_names]
+
+        if len(self.images) != len(self.clss):
+            raise ValueError("SBDataset: images count not equal to cls count")
+
+        self._get_data = self._get_boundaries_data if self.task == "Boundaries" else self._get_segmentation_data
+        self._get_item = self._get_decode_item if self.decode else self._get_undecode_item
+
+    def _get_boundaries_data(self, mat_path):
+        mat_data = self._loadmat(mat_path)
+        return np.concatenate([np.expand_dims(mat_data['GTcls'][0][self.task][0][i][0].toarray(), axis=0)
+                               for i in range(self.categories)], axis=0)
+
+    def _get_segmentation_data(self, mat_path):
+        mat_data = self._loadmat(mat_path)
+        return Image.fromarray(mat_data['GTcls'][0][self.task][0])
+
+    def _get_decode_item(self, idx):
+        return Image.open(self.images[idx]).convert('RGB'), self._get_data(self.clss[idx])
+
+    def _get_undecode_item(self, idx):
+        return np.fromfile(self.images[idx], dtype=np.uint8), self._get_data(self.clss[idx])
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getitem__(self, idx):
+        return self._get_item(idx)
+
+
+class SBDataset(GeneratorDataset):
+    """
+    A source dataset for reading and parsing Semantic Boundaries Dataset.
+
+    The generated dataset has two columns: :py:obj:`[image, task]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`task` contains 20 images of the uint8 type if `task` is `Boundaries` otherwise
+    contains 1 image of the uint8 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        task (str, optional): Acceptable tasks include `Boundaries` or `Segmentation` (default= `Boundaries`).
+        usage (str, optional): Acceptable usages include `train`, `val`, `train_noval` and `all` (default= `all`).
+        num_samples (int, optional): The number of images to be included in the dataset.
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+
+    Raises:
+        RuntimeError: If dataset_dir is not valid or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If task is not in [`Boundaries`, `Segmentation`].
+        ValueError: If usage is not in [`train`, `val`, `train_noval`, `all`].
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> sb_dataset_dir = "/path/to/sb_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from Semantic Boundaries Dataset in sequence
+        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from Semantic Boundaries Dataset
+        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # 3) Get samples from Semantic Boundaries Dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.SBDataset(dataset_dir=sb_dataset_dir, num_shards=2, shard_id=0)
+        >>>
+        >>> # In Semantic Boundaries Dataset, each dictionary has keys "image" and "task"
+
+    About Semantic Boundaries Dataset:
+
+    The Semantic Boundaries Dataset consists of 11355 colour images. There are 8498 images' name in the train.txt,
+    2857 images' name in the val.txt and 5623 images' name in the train_noval.txt. The category cls/
+    contains the Segmentation and Boundaries results of category-level, the category inst/ catains the
+    Segmentation and Boundaries results of instance-level.
+
+    You can unzip the dataset files into the following structure and read by MindSpore's API:
+
+    .. code-block::
+
+         .
+         └── benchmark_RELEASE
+              ├── dataset
+              ├── img
+              │    ├── 2008_000002.jpg
+              │    ├── 2008_000003.jpg
+              │    ├── ...
+              ├── cls
+              │    ├── 2008_000002.mat
+              │    ├── 2008_000003.mat
+              │    ├── ...
+              ├── inst
+              │    ├── 2008_000002.mat
+              │    ├── 2008_000003.mat
+              │    ├── ...
+              ├── train.txt
+              └── val.txt
+
+    .. code-block::
+
+        @InProceedings{BharathICCV2011,
+            author       = "Bharath Hariharan and Pablo Arbelaez and Lubomir Bourdev and
+                            Subhransu Maji and Jitendra Malik",
+            title        = "Semantic Contours from Inverse Detectors",
+            booktitle    = "International Conference on Computer Vision (ICCV)",
+            year         = "2011",
+    """
+
+    @check_sb_dataset
+    def __init__(self, dataset_dir, task='Boundaries', usage='all', num_samples=None, num_parallel_workers=1,
+                 shuffle=None, decode=None, sampler=None, num_shards=None, shard_id=None):
+        dataset = _SBDataset(dataset_dir, task, usage, decode)
+        super().__init__(dataset, column_names=dataset.column_list, num_samples=num_samples,
+                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
+                         num_shards=num_shards, shard_id=shard_id)
+
+
+class SBUDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing the SBU dataset.
+
+    The generated dataset has two columns :py:obj:`[image, caption]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`caption` is of the string type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        decode (bool, optional): Decode the images after reading (default=False).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter 'sampler'
+         - Parameter 'shuffle'
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> sbu_dataset_dir = "/path/to/sbu_dataset_directory"
+        >>> # Read 3 samples from SBU dataset
+        >>> dataset = ds.SBUDataset(dataset_dir=sbu_dataset_dir, num_samples=3)
+
+    About SBU dataset:
+
+    SBU dataset is a large captioned photo collection.
+    It contains one million images with associated visually relevant captions.
+
+    You should manually download the images using official download.m by replacing 'urls{i}(24, end)' with
+    'urls{i}(24:1:end)' and keep the directory as below.
+
+    .. code-block::
+
+        .
+        └─ dataset_dir
+           ├── SBU_captioned_photo_dataset_captions.txt
+           ├── SBU_captioned_photo_dataset_urls.txt
+           └── sbu_images
+               ├── m_3326_3596303505_3ce4c20529.jpg
+               ├── ......
+               └── m_2522_4182181099_c3c23ab1cc.jpg
+
+    Citation:
+
+    .. code-block::
+
+        @inproceedings{Ordonez:2011:im2text,
+          Author    = {Vicente Ordonez and Girish Kulkarni and Tamara L. Berg},
+          Title     = {Im2Text: Describing Images Using 1 Million Captioned Photographs},
+          Booktitle = {Neural Information Processing Systems ({NIPS})},
+          Year      = {2011},
+        }
+    """
+
+    @check_sbu_dataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.SBUNode(self.dataset_dir, self.decode, self.sampler)
+
+
+class SemeionDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing Semeion dataset.
+
+    The generated dataset has two columns :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        num_samples (int, optional): The number of samples to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> semeion_dataset_dir = "/path/to/semeion_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from SEMEION dataset in sequence
+        >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 10 samples from SEMEION dataset
+        >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, num_samples=10, shuffle=True)
+        >>>
+        >>> # 3) Get samples from SEMEION dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, num_shards=2, shard_id=0)
+        >>>
+        >>> # In SEMEION dataset, each dictionary has keys: image, label.
+
+    About SEMEION dataset:
+
+    The dataset was created by Tactile Srl, Brescia, Italy (http://www.tattile.it) and donated in 1994
+    to Semeion Research Center of Sciences of Communication, Rome, Italy (http://www.semeion.it),
+    for machine learning research.
+
+    This dataset consists of 1593 records (rows) and 256 attributes (columns). Each record represents
+    a handwritten digit, originally scanned with a resolution of 256 grey scale. Each pixel of the each
+    original scanned image was first stretched, and after scaled between 0 and 1
+    (setting to 0 every pixel whose value was under the value 127 of the grey scale (127 included)
+    and setting to 1 each pixel whose original value in the grey scale was over 127). Finally, each binary image
+    was scaled again into a 16x16 square box (the final 256 binary attributes).
+
+    .. code-block::
+
+        .
+        └── semeion_dataset_dir
+            └──semeion.data
+            └──semeion.names
+
+    Citation:
+
+    .. code-block::
+
+        @article{
+          title={The Theory of Independent Judges, in Substance Use & Misuse 33(2)1998, pp 439-461},
+          author={M Buscema, MetaNet},
+        }
+    """
+
+    @check_semeion_dataset
+    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+
+    def parse(self, children=None):
+        return cde.SemeionNode(self.dataset_dir, self.sampler)
+
+
+class STL10Dataset(MappableDataset):
+    """
+    A source dataset for reading and parsing STL10 dataset.
+
+    The generated dataset has two columns: :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is of a scalar of int32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be "train", "test",
+            "unlabeled", "train+unlabeled" or "all" . "train" will read from 5,000
+            train samples, "test" will read from 8,000 test samples,
+            "unlabeled" will read from all 100,000 samples, and "train+unlabeled"
+            will read from 105000 samples, "all" will read all the samples
+            (default=None, all samples).
+        num_samples (int, optional): The number of images to be included in the dataset.
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        sampler (Sampler, optional): Object used to choose samples from the
+            dataset (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, 'num_samples' reflects
+            the max sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir is not valid or does not exist or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If usage is invalid.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter 'sampler'
+         - Parameter 'shuffle'
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> stl10_dataset_dir = "/path/to/stl10_dataset_directory"
+        >>>
+        >>> # 1) Get all samples from STL10 dataset in sequence
+        >>> dataset = ds.STL10Dataset(dataset_dir=stl10_dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from STL10 dataset
+        >>> dataset = ds.STL10Dataset(dataset_dir=stl10_dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # 3) Get samples from STL10 dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.STL10Dataset(dataset_dir=stl10_dataset_dir, num_shards=2, shard_id=0)
+
+    About STL10 dataset:
+
+    STL10 dataset consists of 10 classes: airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck.
+    STL10 is is inspired by the CIFAR-10 dataset.
+    Images are 96x96 pixels, color.
+    500 training images, 800 test images per class and 100000 unlabeled images.
+    Labels are 0-indexed, and unlabeled images have -1 as their labels.
+
+    Here is the original STL10 dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └── stl10_dataset_dir
+             ├── train_X.bin
+             ├── train_y.bin
+             ├── test_X.bin
+             ├── test_y.bin
+             └── unlabeled_X.bin
+
+    Citation of STL10 dataset.
+
+    .. code-block::
+
+        @techreport{Coates10,
+        author       = {Adam Coates},
+        title        = {Learning multiple layers of features from tiny images},
+        year         = {20010},
+        howpublished = {https://cs.stanford.edu/~acoates/stl10/},
+        description  = {The STL-10 dataset consists of 96x96 RGB images in 10 classes,
+                        with 500 training images and 800 testing images per class.
+                        There are 5000 training images and 8000 test images.
+                        It also has 100000 unlabeled images for unsupervised learning.
+                        These examples are extracted from a similar but broader distribution of images.
+                        }
+        }
+    """
+
+    @check_stl10_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.STL10Node(self.dataset_dir, self.usage, self.sampler)
+
+
+class _SVHNDataset:
+    """
+    Mainly for loading SVHN Dataset, and return two rows each time.
+    """
+
+    def __init__(self, dataset_dir, usage):
+        self.dataset_dir = os.path.realpath(dataset_dir)
+        self.usage = usage
+        self.column_names = ["image", "label"]
+        self.usage_all = ["train", "test", "extra"]
+        self.data = np.array([], dtype=np.uint8)
+        self.labels = np.array([], dtype=np.uint32)
+
+        if self.usage == "all":
+            for _usage in self.usage_all:
+                data, label = self._load_mat(_usage)
+                self.data = np.concatenate((self.data, data)) if self.data.size else data
+                self.labels = np.concatenate((self.labels, label)) if self.labels.size else label
+        else:
+            self.data, self.labels = self._load_mat(self.usage)
+
+    def _load_mat(self, mode):
+        filename = mode + "_32x32.mat"
+        mat_data = loadmat(os.path.join(self.dataset_dir, filename))
+        data = np.transpose(mat_data['X'], [3, 0, 1, 2])
+        label = mat_data['y'].astype(np.uint32).squeeze()
+        np.place(label, label == 10, 0)
+        return data, label
+
+    def __getitem__(self, index):
+        return self.data[index], self.labels[index]
+
+    def __len__(self):
+        return len(self.data)
+
+
+class SVHNDataset(GeneratorDataset):
+    """
+    A source dataset for reading and parsing SVHN dataset.
+
+    The generated dataset has two columns: :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset
+            (default=None, will read all samples).
+        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
+        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
+            (default=None, expected order behavior shown in the table).
+        sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
+            input is required (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
+            sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This argument must be specified only
+            when num_shards is also specified. Random accessible input is required.
+
+    Raises:
+        RuntimeError: If dataset_dir is not valid or does not exist or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If usage is invalid.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter 'sampler'
+         - Parameter 'shuffle'
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> svhn_dataset_dir = "/path/to/svhn_dataset_directory"
+        >>> dataset = ds.SVHNDataset(dataset_dir=svhn_dataset_dir, usage="train")
+
+    About SVHN dataset:
+
+    SVHN dataset consists of 10 digit classes.
+    SVHN is obtained from house numbers in Google Street View images.
+    73257 digits for training, 26032 digits for testing, and 531131 additional extra training data.
+
+    Here is the original SVHN dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └── svhn_dataset_dir
+             ├── train_32x32.mat
+             ├── test_32x32.mat
+             └── extra_32x32.mat
+
+    Citation:
+
+    .. code-block::
+
+        @article{
+          title={Reading Digits in Natural Images with Unsupervised Feature Learning},
+          author={Yuval Netzer, Tao Wang, Adam Coates, Alessandro Bissacco, Bo Wu, Andrew Y. Ng},
+          conference={NIPS Workshop on Deep Learning and Unsupervised Feature Learning 2011.},
+          year={2011},
+          publisher={NIPS}
+          url={http://ufldl.stanford.edu/housenumbers}
+        }
+
+    """
+
+    @check_svhn_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=1, shuffle=None,
+                 sampler=None, num_shards=None, shard_id=None):
+        self.dataset_dir = os.path.realpath(dataset_dir)
+        self.usage = replace_none(usage, "all")
+        dataset = _SVHNDataset(self.dataset_dir, self.usage)
+
+        super().__init__(dataset, column_names=dataset.column_names, num_samples=num_samples,
+                         num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler,
+                         num_shards=num_shards, shard_id=shard_id)
+
+
+class USPSDataset(SourceDataset):
+    """
+    A source dataset for reading and parsing the USPS dataset.
+
+    The generated dataset has two columns: :py:obj:`[image, label]`.
+    The tensor of column :py:obj:`image` is of the uint8 type.
+    The tensor of column :py:obj:`label` is of a scalar of uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be "train", "test" or "all". "train" will read from 7,291
+            train samples, "test" will read from 2,007 test samples, "all" will read from all 9,298 samples.
+            (default=None, will read all samples)
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (Union[bool, Shuffle level], optional): Perform reshuffling of the data every epoch
+            (default=Shuffle.GLOBAL).
+            If shuffle is False, no shuffling will be performed;
+            If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
+            Otherwise, there are two levels of shuffling:
+
+            - Shuffle.GLOBAL: Shuffle both the files and samples.
+
+            - Shuffle.FILES: Shuffle files only.
+
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the max sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This
+            argument can only be specified when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir is not valid or does not exist or does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If usage is invalid.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Examples:
+        >>> usps_dataset_dir = "/path/to/usps_dataset_directory"
+        >>>
+        >>> # Read 3 samples from USPS dataset
+        >>> dataset = ds.USPSDataset(dataset_dir=usps_dataset_dir, num_samples=3)
+        >>>
+        >>> # Note: In USPS dataset, each dictionary has keys "image" and "label"
+
+    About USPS dataset:
+
+    USPS is a digit dataset automatically scanned from envelopes by the U.S. Postal Service
+    containing a total of 9,298 16×16 pixel grayscale samples.
+    The images are centered, normalized and show a broad range of font styles.
+
+    Here is the original USPS dataset structure.
+    You can download and unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+        .
+        └── usps_dataset_dir
+             ├── usps
+             ├── usps.t
+
+    Citation:
+
+    .. code-block::
+
+        @article{hull1994database,
+          title={A database for handwritten text recognition research},
+          author={Hull, Jonathan J.},
+          journal={IEEE Transactions on pattern analysis and machine intelligence},
+          volume={16},
+          number={5},
+          pages={550--554},
+          year={1994},
+          publisher={IEEE}
+        }
+    """
+
+    @check_usps_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
+                 num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
+                         num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+
+    def parse(self, children=None):
+        return cde.USPSNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
+                            self.shard_id)
+
+
+class VOCDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing VOC dataset.
+
+    The generated dataset with different task setting has different output columns:
+
+    - task = :py:obj:`Detection`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
+        :py:obj:`[label, dtype=uint32]`, :py:obj:`[difficult, dtype=uint32]`, :py:obj:`[truncate, dtype=uint32]`.
+    - task = :py:obj:`Segmentation`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[target,dtype=uint8]`.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        task (str, optional): Set the task type of reading voc data, now only support `Segmentation` or `Detection`
+            (default= `Segmentation`).
+        usage (str, optional): Set the task type of ImageSets(default= `train`). If task is `Segmentation`, image and
+            annotation list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is `Detection`, image and
+            annotation list will be loaded in ./ImageSets/Main/usage + ".txt"; if task and usage are not set, image and
+            annotation list will be loaded in ./ImageSets/Segmentation/train.txt as default.
+        class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
+            `Detection` task (default=None, the folder names will be sorted alphabetically and each
+            class will be given a unique index starting from 0).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, number set in the config).
+        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
+            order behavior shown in the table).
+        decode (bool, optional): Decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided
+            into (default=None). When this argument is specified, `num_samples` reflects
+            the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within num_shards (default=None). This
+            argument can only be specified when num_shards is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
+            (default=None, which means no cache is used).
+        extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
+            :py:obj:`[_meta-filename, dtype=string]` will be output at the end (default=False).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If xml of Annotations is an invalid format.
+        RuntimeError: If xml of Annotations loss attribution of `object`.
+        RuntimeError: If xml of Annotations loss attribution of `bndbox`.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If task is not equal 'Segmentation' or 'Detection'.
+        ValueError: If task equal 'Segmentation' but class_indexing is not None.
+        ValueError: If txt related to mode is not exist.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op
+          is added to remove the prefix('_meta-').
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> voc_dataset_dir = "/path/to/voc_dataset_directory"
+        >>>
+        >>> # 1) Read VOC data for segmentation training
+        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Segmentation", usage="train")
+        >>>
+        >>> # 2) Read VOC data for detection training
+        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection", usage="train")
+        >>>
+        >>> # 3) Read all VOC dataset samples in voc_dataset_dir with 8 threads in random order
+        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection", usage="train",
+        ...                         num_parallel_workers=8)
+        >>>
+        >>> # 4) Read then decode all VOC dataset samples in voc_dataset_dir in sequence
+        >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection", usage="train",
+        ...                         decode=True, shuffle=False)
+        >>>
+        >>> # In VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
+        >>> # In VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
+
+    About VOC dataset.
+
+    The PASCAL Visual Object Classes (VOC) challenge is a benchmark in visual
+    object category recognition and detection, providing the vision and machine
+    learning communities with a standard dataset of images and annotation, and
+    standard evaluation procedures.
+
+    You can unzip the original VOC-2012 dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── voc2012_dataset_dir
+            ├── Annotations
+            │    ├── 2007_000027.xml
+            │    ├── 2007_000032.xml
+            │    ├── ...
+            ├── ImageSets
+            │    ├── Action
+            │    ├── Layout
+            │    ├── Main
+            │    └── Segmentation
+            ├── JPEGImages
+            │    ├── 2007_000027.jpg
+            │    ├── 2007_000032.jpg
+            │    ├── ...
+            ├── SegmentationClass
+            │    ├── 2007_000032.png
+            │    ├── 2007_000033.png
+            │    ├── ...
+            └── SegmentationObject
+                 ├── 2007_000032.png
+                 ├── 2007_000033.png
+                 ├── ...
+
+    Citation:
+
+    .. code-block::
+
+        @article{Everingham10,
+        author       = {Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.},
+        title        = {The Pascal Visual Object Classes (VOC) Challenge},
+        journal      = {International Journal of Computer Vision},
+        volume       = {88},
+        year         = {2012},
+        number       = {2},
+        month        = {jun},
+        pages        = {303--338},
+        biburl       = {http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.html#bibtex},
+        howpublished = {http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html}
+        }
+    """
+
+    @check_vocdataset
+    def __init__(self, dataset_dir, task="Segmentation", usage="train", class_indexing=None, num_samples=None,
+                 num_parallel_workers=None, shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None,
+                 cache=None, extra_metadata=False):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+        self.dataset_dir = dataset_dir
+        self.task = replace_none(task, "Segmentation")
+        self.usage = replace_none(usage, "train")
+        self.class_indexing = replace_none(class_indexing, {})
+        self.decode = replace_none(decode, False)
+        self.extra_metadata = extra_metadata
+
+    def parse(self, children=None):
+        return cde.VOCNode(self.dataset_dir, self.task, self.usage, self.class_indexing, self.decode, self.sampler,
+                           self.extra_metadata)
+
+    def get_class_indexing(self):
+        """
+        Get the class index.
+
+        Returns:
+            dict, a str-to-int mapping from label name to index.
+
+        Examples:
+            >>> voc_dataset_dir = "/path/to/voc_dataset_directory"
+            >>>
+            >>> dataset = ds.VOCDataset(dataset_dir=voc_dataset_dir, task="Detection")
+            >>> class_indexing = dataset.get_class_indexing()
+        """
+        if self.task != "Detection":
+            raise NotImplementedError("Only 'Detection' support get_class_indexing.")
+        if self.class_indexing is None or not self.class_indexing:
+            if self._class_indexing is None:
+                runtime_getter = self._init_tree_getters()
+                self._class_indexing = runtime_getter[0].GetClassIndexing()
+            self.class_indexing = {}
+            for pair in self._class_indexing:
+                self.class_indexing[pair[0]] = pair[1][0]
+        return self.class_indexing
+
+
+class WIDERFaceDataset(MappableDataset):
+    """
+    A source dataset for reading and parsing WIDERFace dataset.
+
+    When usage is "train", "valid" or "all", the generated dataset has eight columns ["image", "bbox", "blur",
+    "expression", "illumination", "occlusion", "pose", "invalid"]. When usage is "test", it only has one column
+    ["image"].
+    The tensor of column :py:obj:`image` is a vector of the uint8 type.
+    The tensor of column :py:obj:`bbox` is a scalar of the uint32 type.
+    The tensor of column :py:obj:`blur` is a scalar of the uint32 type.
+    The tensor of column :py:obj:`expression` is a scalar of the uint32 type.
+    The tensor of column :py:obj:`illumination` is a scalar of the uint32 type.
+    The tensor of column :py:obj:`occlusion` is a scalar of the uint32 type.
+    The tensor of column :py:obj:`pose` is a scalar of the uint32 type.
+    The tensor of column :py:obj:`invalid` is a scalar of the uint32 type.
+
+    Args:
+        dataset_dir (str): Path to the root directory that contains the dataset.
+        usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`. `train` will read
+            from 12,880 samples, `test` will read from 16,097 samples, `valid` will read from 3,226 test samples
+            and `all` will read all `train` and `valid` samples (default=None, will be set to `all`).
+        num_samples (int, optional): The number of images to be included in the dataset
+            (default=None, will read all images).
+        num_parallel_workers (int, optional): Number of workers to read the data
+            (default=None, will use value set in the config).
+        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
+            (default=None, expected order behavior shown in the table).
+        decode (bool, optional): Decode the images after reading (default=False).
+        sampler (Sampler, optional): Object used to choose samples from the dataset
+            (default=None, expected order behavior shown in the table).
+        num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
+            When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
+        shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only be specified
+            when `num_shards` is also specified.
+        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing
+            (default=None, which means no cache is used).
+
+    Raises:
+        RuntimeError: If dataset_dir does not contain data files.
+        RuntimeError: If num_parallel_workers exceeds the max thread numbers.
+        RuntimeError: If sampler and shuffle are specified at the same time.
+        RuntimeError: If sampler and sharding are specified at the same time.
+        RuntimeError: If num_shards is specified but shard_id is None.
+        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+        ValueError: If usage is not in [`train`, `test`, `valid`, `all`].
+        ValueError: If annotation_file is not exist.
+        ValueError: If dataset_dir is not exist.
+        ValueError: If shard_id is invalid (< 0 or >= num_shards).
+
+    Note:
+        - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
+          The table below shows what input arguments are allowed and their expected behavior.
+
+    .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
+       :widths: 25 25 50
+       :header-rows: 1
+
+       * - Parameter `sampler`
+         - Parameter `shuffle`
+         - Expected Order Behavior
+       * - None
+         - None
+         - random order
+       * - None
+         - True
+         - random order
+       * - None
+         - False
+         - sequential order
+       * - Sampler object
+         - None
+         - order defined by sampler
+       * - Sampler object
+         - True
+         - not allowed
+       * - Sampler object
+         - False
+         - not allowed
+
+    Examples:
+        >>> wider_face_dir = "/path/to/wider_face_dataset"
+        >>>
+        >>> # Read 3 samples from WIDERFace dataset
+        >>> dataset = ds.WIDERFaceDataset(dataset_dir=wider_face_dir, num_samples=3)
+
+    About WIDERFace dataset:
+
+    The WIDERFace database of people faces has a training set of 12,880 samples, a testing set of 16,097 examples
+    and a validating set of 3,226 examples. It is a subset of a larger set available from WIDER. The digits have
+    been size-normalized and centered in a fixed-size image.
+
+    The following is the original WIDERFace dataset structure.
+    You can unzip the dataset files into this directory structure and read by MindSpore's API.
+
+    .. code-block::
+
+        .
+        └── wider_face_dir
+             ├── WIDER_test
+             │    └── images
+             │         ├── 0--Parade
+             │         │     ├── 0_Parade_marchingband_1_9.jpg
+             │         │     ├── ...
+             │         ├──1--Handshaking
+             │         ├──...
+             ├── WIDER_train
+             │    └── images
+             │         ├── 0--Parade
+             │         │     ├── 0_Parade_marchingband_1_11.jpg
+             │         │     ├── ...
+             │         ├──1--Handshaking
+             │         ├──...
+             ├── WIDER_val
+             │    └── images
+             │         ├── 0--Parade
+             │         │     ├── 0_Parade_marchingband_1_102.jpg
+             │         │     ├── ...
+             │         ├──1--Handshaking
+             │         ├──...
+             └── wider_face_split
+                  ├── wider_face_test_filelist.txt
+                  ├── wider_face_train_bbx_gt.txt
+                  └── wider_face_val_bbx_gt.txt
+
+    Citation:
+
+    .. code-block::
+
+        @inproceedings{2016WIDER,
+          title={WIDER FACE: A Face Detection Benchmark},
+          author={Yang, S. and Luo, P. and Loy, C. C. and Tang, X.},
+          booktitle={IEEE},
+          pages={5525-5533},
+          year={2016},
+        }
+    """
+
+    @check_wider_face_dataset
+    def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
+                 decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
+        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
+                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
+
+        self.dataset_dir = dataset_dir
+        self.usage = replace_none(usage, "all")
+        self.decode = replace_none(decode, False)
+
+    def parse(self, children=None):
+        return cde.WIDERFaceNode(self.dataset_dir, self.usage, self.decode, self.sampler)