!6444 dataset API docstring: Update datasets.py and config.py

Merge pull request !6444 from cathwong/ckw_api_dataset_examples
2020-09-18 09:50:51 +08:00 · 2020-09-18 09:50:51 +08:00 · 4d58c25146
parent afc1800940 e971a3aed9
commit 4d58c25146
8 changed files with 259 additions and 181 deletions
--- a/mindspore/dataset/core/config.py
+++ b/mindspore/dataset/core/config.py
@ -13,7 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """
-The configuration manager.
+The configuration module provides various functions to set and get the supported
+configuration parameters, and read a configuration file.
 """
 import random
 import numpy
@ -35,18 +36,20 @@ def set_seed(seed):
    Note:
        This set_seed function sets the seed in the Python random library and numpy.random library
        for deterministic Python augmentations using randomness. This set_seed function should
-        be called with every iterator created to reset the random seed. In our pipeline this
+        be called with every iterator created to reset the random seed. In the pipeline, this
        does not guarantee deterministic results with num_parallel_workers > 1.

    Args:
-        seed(int): seed to be set.
+        seed(int): Seed to be set.

    Raises:
        ValueError: If seed is invalid (< 0 or > MAX_UINT_32).

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # sets the new seed value, now operators with a random seed will use new seed value.
+        >>>
+        >>> # Set a new global configuration value for the seed value.
+        >>> # Operations with randomness will use the seed value to generate random values.
        >>> ds.config.set_seed(1000)
    """
    if seed < 0 or seed > UINT32_MAX:
@ -72,14 +75,15 @@ def set_prefetch_size(size):
    Set the number of rows to be prefetched.

    Args:
-        size (int): total number of rows to be prefetched.
+        size (int): Total number of rows to be prefetched.

    Raises:
        ValueError: If prefetch_size is invalid (<= 0 or > MAX_INT_32).

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # sets the new prefetch value.
+        >>>
+        >>> # Set a new global configuration value for the prefetch size.
        >>> ds.config.set_prefetch_size(1000)
    """
    if size <= 0 or size > INT32_MAX:
@ -102,18 +106,20 @@ def set_num_parallel_workers(num):
    Set the default number of parallel workers.

    Args:
-        num (int): number of parallel workers to be used as a default for each operation.
+        num (int): Number of parallel workers to be used as a default for each operation.

    Raises:
        ValueError: If num_parallel_workers is invalid (<= 0 or > MAX_INT_32).

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # sets the new parallel_workers value, now parallel dataset operators will run with 8 workers.
+        >>>
+        >>> # Set a new global configuration value for the number of parallel workers.
+        >>> # Now parallel dataset operators will run with 8 workers.
        >>> ds.config.set_num_parallel_workers(8)
    """
    if num <= 0 or num > INT32_MAX:
-        raise ValueError("Num workers given is not within the required range.")
+        raise ValueError("Number of parallel workers given is not within the required range.")
    _config.set_num_parallel_workers(num)


@ -129,17 +135,18 @@ def get_num_parallel_workers():

 def set_monitor_sampling_interval(interval):
    """
-    Set the default interval(ms) of monitor sampling.
+    Set the default interval (in milliseconds) for monitor sampling.

    Args:
-        interval (int): interval(ms) to be used to performance monitor sampling.
+        interval (int): Interval (in milliseconds) to be used for performance monitor sampling.

    Raises:
        ValueError: If interval is invalid (<= 0 or > MAX_INT_32).

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # sets the new interval value.
+        >>>
+        >>> # Set a new global configuration value for the monitor sampling interval.
        >>> ds.config.set_monitor_sampling_interval(100)
    """
    if interval <= 0 or interval > INT32_MAX:
@ -152,7 +159,7 @@ def get_monitor_sampling_interval():
    Get the default interval of performance monitor sampling.

    Returns:
-        Interval: interval(ms) of performance monitor sampling.
+        Interval: interval (in milliseconds) for performance monitor sampling.
    """
    return _config.get_monitor_sampling_interval()

@ -163,18 +170,19 @@ def set_callback_timeout(timeout):
    In case of a deadlock, the wait function will exit after the timeout period.

    Args:
-        timeout (int): timeout(s) to be used to end teh wait in DSWaitedCallback in case of a deadlock.
+        timeout (int): Timeout (in seconds) to be used to end the wait in DSWaitedCallback in case of a deadlock.

    Raises:
        ValueError: If timeout is invalid (<= 0 or > MAX_INT_32).

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # sets the new timout value.
+        >>>
+        >>> # Set a new global configuration value for the timeout value.
        >>> ds.config.set_callback_timeout(100)
    """
    if timeout <= 0 or timeout > INT32_MAX:
-        raise ValueError("timeout given is not within the required range.")
+        raise ValueError("Timeout given is not within the required range.")
    _config.set_callback_timeout(timeout)


@ -201,25 +209,23 @@ def __str__():

 def load(file):
    """
-    Load configuration from a file.
+    Load configurations from a file.

    Args:
-        file (str): path the config file to be loaded.
+        file (str): Path of the configuration file to be loaded.

    Raises:
        RuntimeError: If file is invalid and parsing fails.

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # sets the default value according to values in configuration file.
+        >>>
+        >>> # Set new default configuration values according to values in the configuration file.
        >>> ds.config.load("path/to/config/file")
        >>> # example config file:
        >>> # {
        >>> #     "logFilePath": "/tmp",
-        >>> #     "rowsPerBuffer": 32,
        >>> #     "numParallelWorkers": 4,
-        >>> #     "workerConnectorSize": 16,
-        >>> #     "opConnectorSize": 16,
        >>> #     "seed": 5489,
        >>> #     "monitorSamplingInterval": 30
        >>> # }
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -84,7 +84,7 @@ def zip(datasets):
            >>> ds1 = ds.ImageFolderDataset(dataset_dir1, num_parallel_workers=8)
            >>> ds2 = ds.ImageFolderDataset(dataset_dir2, num_parallel_workers=8)
            >>>
-            >>> # creates a dataset which is the combination of ds1 and ds2
+            >>> # Create a dataset which is the combination of ds1 and ds2
            >>> data = ds.zip((ds1, ds2))
    """
    if len(datasets) <= 1:
@ -218,18 +218,19 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
            >>>
-            >>> # creates a dataset where every 100 rows is combined into a batch
+            >>> # Create a dataset where every 100 rows is combined into a batch
            >>> # and drops the last incomplete batch if there is one.
            >>> column_names = ["col1", "col2"]
            >>> buket_boundaries = [5, 10]
            >>> bucket_batch_sizes = [5, 1, 1]
            >>> element_length_function = (lambda col1, col2: max(len(col1), len(col2)))
            >>>
-            >>> # will pad col1 to shape [2, bucket_boundaries[i]] where i is the
+            >>> # Will pad col1 to shape [2, bucket_boundaries[i]] where i is the
            >>> # index of the bucket that is currently being batched.
-            >>> # will pad col2 to a shape where each dimension is the longest in all
+            >>> # Will pad col2 to a shape where each dimension is the longest in all
            >>> # the elements currently being batched.
            >>> pad_info = {"col1", ([2, None], -1)}
            >>> pad_to_bucket_boundary = True
@ -291,8 +292,10 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
-            >>> # creates a dataset where every 100 rows is combined into a batch
+            >>>
+            >>> # Create a dataset where every 100 rows is combined into a batch
            >>> # and drops the last incomplete batch if there is one.
            >>> data = data.batch(100, True)
        """
@ -314,6 +317,7 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
            >>> data = data.sync_wait("callback1")
            >>> data = data.batch(batch_size)
@ -349,11 +353,12 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
-            >>> # data is an instance of Dataset object
-            >>> # optionally set the seed for the first epoch
+            >>>
+            >>> # data is an instance of Dataset object.
+            >>> # Optionally set the seed for the first epoch
            >>> ds.config.set_seed(58)
            >>>
-            >>> # creates a shuffled dataset using a shuffle buffer of size 4
+            >>> # Create a shuffled dataset using a shuffle buffer of size 4
            >>> data = data.shuffle(4)
        """
        return ShuffleDataset(self, buffer_size)
@ -375,12 +380,13 @@ class Dataset:
        Examples:
            >>> import mindspore.dataset as ds
            >>> import mindspore.dataset.text as text
-            >>> # declare a function which returns a Dataset object
+            >>>
+            >>> # Declare a function which returns a Dataset object
            >>> def flat_map_func(x):
            >>>     data_dir = text.to_str(x[0])
            >>>     d = ds.ImageFolderDataset(data_dir)
            >>>     return d
-            >>> # data is a Dataset object
+            >>> # data is an instance of a Dataset object.
            >>> data = ds.TextFileDataset(DATA_FILE)
            >>> data = data.flat_map(flat_map_func)

@ -460,16 +466,17 @@ class Dataset:
            >>> import mindspore.dataset.vision.c_transforms as c_transforms
            >>>
            >>> # data is an instance of Dataset which has 2 columns, "image" and "label".
-            >>> # ds_pyfunc is an instance of Dataset which has 3 columns, "col0", "col1", and "col2". Each column is
-            >>> # a 2d array of integers.
+            >>> # ds_pyfunc is an instance of Dataset which has 3 columns, "col0", "col1", and "col2".
+            >>> # Each column is a 2D array of integers.
            >>>
-            >>> # This config is a global setting, meaning that all future operations which
-            >>> # uses this config value will use 2 worker threads, unless if specified
-            >>> # otherwise in their constructor. set_num_parallel_workers can be called
-            >>> # again later if a different number of worker threads are needed.
+            >>> # Set the global configuration value for num_parallel_workers to be 2.
+            >>> # Operations which use this configuration value will use 2 worker threads,
+            >>> # unless otherwise specified in the operator's constructor.
+            >>> # set_num_parallel_workers can be called again later if a different
+            >>> # global configuration value for the number of worker threads is desired.
            >>> ds.config.set_num_parallel_workers(2)
            >>>
-            >>> # Two operations, which takes 1 column for input and outputs 1 column.
+            >>> # Define two operations, where each operation accepts 1 input column and outputs 1 column.
            >>> decode_op = c_transforms.Decode(rgb_format=True)
            >>> random_jitter_op = c_transforms.RandomColorAdjust((0.8, 0.8), (1, 1), (1, 1), (0, 0))
            >>>
@ -478,12 +485,12 @@ class Dataset:
            >>> operations = [decode_op]
            >>> input_columns = ["image"]
            >>>
-            >>> # Applies decode_op on column "image". This column will be replaced by the outputed
+            >>> # Apply decode_op on column "image". This column will be replaced by the outputted
            >>> # column of decode_op. Since column_order is not provided, both columns "image"
            >>> # and "label" will be propagated to the child node in their original order.
            >>> ds_decoded = data.map(operations, input_columns)
            >>>
-            >>> # Rename column "image" to "decoded_image"
+            >>> # Rename column "image" to "decoded_image".
            >>> output_columns = ["decoded_image"]
            >>> ds_decoded = data.map(operations, input_columns, output_columns)
            >>>
@ -501,7 +508,7 @@ class Dataset:
            >>> output_columns = ["decoded_image"]
            >>> ds_decoded = data.map(operations, input_columns, output_columns, column_order)
            >>>
-            >>> # Simple example using pyfunc. Renaming columns and specifying column order
+            >>> # A simple example using pyfunc: Renaming columns and specifying column order
            >>> # work in the same way as the previous examples.
            >>> input_columns = ["col0"]
            >>> operations = [(lambda x: x + 1)]
@ -515,7 +522,7 @@ class Dataset:
            >>>
            >>> input_columns = ["image"]
            >>>
-            >>> # Creates a dataset where the images are decoded, then randomly color jittered.
+            >>> # Create a dataset where the images are decoded, then randomly color jittered.
            >>> # decode_op takes column "image" as input and outputs one column. The column
            >>> # outputted by decode_op is passed as input to random_jitter_op.
            >>> # random_jitter_op will output one column. Column "image" will be replaced by
@ -524,13 +531,13 @@ class Dataset:
            >>> # columns will remain the same.
            >>> ds_mapped = data.map(operations, input_columns)
            >>>
-            >>> # Creates a dataset that is identical to ds_mapped, except the column "image"
+            >>> # Create a dataset that is identical to ds_mapped, except the column "image"
            >>> # that is outputted by random_jitter_op is renamed to "image_transformed".
            >>> # Specifying column order works in the same way as examples in 1).
            >>> output_columns = ["image_transformed"]
            >>> ds_mapped_and_renamed = data.map(operation, input_columns, output_columns)
            >>>
-            >>> # Multiple operations using pyfunc. Renaming columns and specifying column order
+            >>> # Multiple operations using pyfunc: Renaming columns and specifying column order
            >>> # work in the same way as examples in 1).
            >>> input_columns = ["col0"]
            >>> operations = [(lambda x: x + x), (lambda x: x - 1)]
@ -543,15 +550,15 @@ class Dataset:
            >>> # operations[1] is a lambda that takes 3 columns as input and outputs 1 column.
            >>> # operations[1] is a lambda that takes 1 column as input and outputs 4 columns.
            >>> #
-            >>> # Note: the number of output columns of operation[i] must equal the number of
+            >>> # Note: The number of output columns of operation[i] must equal the number of
            >>> # input columns of operation[i+1]. Otherwise, this map call will also result
            >>> # in an error.
            >>> operations = [(lambda x y: (x, x + y, x + y + 1)),
            >>>               (lambda x y z: x * y * z),
            >>>               (lambda x: (x % 2, x % 3, x % 5, x % 7))]
            >>>
-            >>> # Note: because the number of input columns is not the same as the number of
-            >>> # output columns, the output_columns and column_order parameter must be
+            >>> # Note: Since the number of input columns is not the same as the number of
+            >>> # output columns, the output_columns and column_order parameters must be
            >>> # specified. Otherwise, this map call will also result in an error.
            >>> input_columns = ["col2", "col0"]
            >>> output_columns = ["mod2", "mod3", "mod5", "mod7"]
@ -614,15 +621,17 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
-            >>> # creates a dataset where the dataset is repeated for 50 epochs
+            >>>
+            >>> # Create a dataset where the dataset is repeated for 50 epochs
            >>> repeated = data.repeat(50)
            >>>
-            >>> # creates a dataset where each epoch is shuffled individually
+            >>> # Create a dataset where each epoch is shuffled individually
            >>> shuffled_and_repeated = data.shuffle(10)
            >>> shuffled_and_repeated = shuffled_and_repeated.repeat(50)
            >>>
-            >>> # creates a dataset where the dataset is first repeated for
+            >>> # Create a dataset where the dataset is first repeated for
            >>> # 50 epochs before shuffling. The shuffle operator will treat
            >>> # the entire 50 epochs as one big dataset.
            >>> repeat_and_shuffle = data.repeat(50)
@ -645,8 +654,9 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
-            >>> # creates a dataset which skips first 3 elements from data
+            >>> # Create a dataset which skips first 3 elements from data
            >>> data = data.skip(3)
        """
        return SkipDataset(self, count)
@ -670,8 +680,9 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
-            >>> # creates a dataset where the dataset including 50 elements.
+            >>> # Create a dataset where the dataset includes 50 elements.
            >>> data = data.take(50)
        """
        if count == -1:
@ -781,11 +792,11 @@ class Dataset:
        Examples:
            >>> import mindspore.dataset as ds
            >>>
-            >>> dataset_dir = "/path/to/text_file.txt"
+            >>> dataset_files = "/path/to/text_file/*"
            >>>
-            >>> # TextFileDataset is not a mappable dataset, so this non optimized split will be called.
-            >>> # many datasets have shuffle on by default, set shuffle to False if split will be called!
-            >>> data = ds.TextFileDataset(dataset_dir, shuffle=False)
+            >>> # TextFileDataset is not a mappable dataset, so this non-optimized split will be called.
+            >>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
+            >>> data = ds.TextFileDataset(dataset_files, shuffle=False)
            >>> train, test = data.split([0.9, 0.1])
        """
        if self.is_shuffled():
@ -829,8 +840,9 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # ds1 and ds2 are instances of Dataset object
-            >>> # creates a dataset which is the combination of ds1 and ds2
+            >>> # Create a dataset which is the combination of ds1 and ds2
            >>> data = ds1.zip(ds2)
        """
        if isinstance(datasets, tuple):
@ -858,10 +870,12 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # ds1 and ds2 are instances of Dataset object
-            >>> # creates a dataset by concatenating ds1 and ds2 with "+" operator
+            >>>
+            >>> # Create a dataset by concatenating ds1 and ds2 with "+" operator
            >>> data1 = ds1 + ds2
-            >>> # creates a dataset by concatenating ds1 and ds2 with concat operation
+            >>> # Create a dataset by concatenating ds1 and ds2 with concat operation
            >>> data1 = ds1.concat(ds2)
        """
        if isinstance(datasets, Dataset):
@ -886,11 +900,12 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object.
            >>> input_columns = ["input_col1", "input_col2", "input_col3"]
            >>> output_columns = ["output_col1", "output_col2", "output_col3"]
            >>>
-            >>> # creates a dataset where input_col1 is renamed to output_col1, and
+            >>> # Create a dataset where input_col1 is renamed to output_col1, and
            >>> # input_col2 is renamed to output_col2, and input_col3 is renamed
            >>> # to output_col3.
            >>> data = data.rename(input_columns=input_columns, output_columns=output_columns)
@ -914,10 +929,11 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object
            >>> columns_to_project = ["column3", "column1", "column2"]
            >>>
-            >>> # creates a dataset that consist of column3, column1, column2
+            >>> # Create a dataset that consists of column3, column1, column2
            >>> # in that order, regardless of the original order of columns.
            >>> data = data.project(columns=columns_to_project)
        """
@ -945,12 +961,15 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object
-            >>> # declare an apply_func function which returns a Dataset object
+            >>>
+            >>> # Declare an apply_func function which returns a Dataset object
            >>> def apply_func(ds):
            >>>     ds = ds.batch(2)
            >>>     return ds
-            >>> # use apply to call apply_func
+            >>>
+            >>> # Use apply to call apply_func
            >>> data = data.apply(apply_func)

        Raises:
@ -1150,8 +1169,10 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object
-            >>> # create an iterator
+            >>>
+            >>> # Create an iterator
            >>> # The columns in the data obtained by the iterator will not be changed.
            >>> iterator = data.create_tuple_iterator()
            >>> for item in iterator:
@ -1171,8 +1192,6 @@ class Dataset:
        Args:
            num_epochs (int, optional): Maximum number of epochs that iterator can be iterated
                (default=-1, iterator can be iterated infinite number of epochs).
-            num_epochs (int, optional): maximum epochs that iterator can be iteratered,
-                if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1)
            output_numpy (bool, optional): Whether or not to output NumPy datatype,
                if output_numpy=False, iterator will output MSTensor (default=False).

@ -1181,14 +1200,15 @@ class Dataset:

        Examples:
            >>> import mindspore.dataset as ds
+            >>>
            >>> # data is an instance of Dataset object
+            >>>
            >>> # create an iterator
            >>> # The columns in the data obtained by the iterator might be changed.
            >>> iterator = data.create_dict_iterator()
            >>> for item in iterator:
            >>>     # print the data in column1
            >>>     print(item["column1"])
-
        """
        if self._noop_mode():
            return DummyIterator(self, 'dict')
@ -1426,10 +1446,10 @@ class MappableDataset(SourceDataset):
            >>> import mindspore.dataset as ds
            >>>
            >>> dataset_dir = "/path/to/imagefolder_directory"
-            >>> # a SequentialSampler is created by default
+            >>> # Note: A SequentialSampler is created by default
            >>> data = ds.ImageFolderDataset(dataset_dir)
            >>>
-            >>> # use a DistributedSampler instead of the SequentialSampler
+            >>> # Use a DistributedSampler instead of the SequentialSampler
            >>> new_sampler = ds.DistributedSampler(10, 2)
            >>> data.use_sampler(new_sampler)
        """
@ -1514,15 +1534,15 @@ class MappableDataset(SourceDataset):
            >>>
            >>> dataset_dir = "/path/to/imagefolder_directory"
            >>>
-            >>> # many datasets have shuffle on by default, set shuffle to False if split will be called!
+            >>> # Since many datasets have shuffle on by default, set shuffle to False if split will be called!
            >>> data = ds.ImageFolderDataset(dataset_dir, shuffle=False)
            >>>
-            >>> # sets the seed, and tells split to use this seed when randomizing. This
-            >>> # is needed because we are sharding later
+            >>> # Set the seed, and tell split to use this seed when randomizing.
+            >>> # This is needed because sharding will be done later
            >>> ds.config.set_seed(58)
            >>> train, test = data.split([0.9, 0.1])
            >>>
-            >>> # if we want to shard the train dataset, we can use a DistributedSampler
+            >>> # To shard the train dataset, use a DistributedSampler
            >>> train_sampler = ds.DistributedSampler(10, 2)
            >>> train.use_sampler(train_sampler)
        """
@ -1990,7 +2010,7 @@ class _PythonCallable:

 class MapDataset(DatasetOp):
    """
-    The result of applying Map operator to the input Dataset.
+    The result of applying the Map operator to the input Dataset.

    Args:
        input_dataset (Dataset): Input Dataset to be mapped.
@ -2756,14 +2776,19 @@ class ImageFolderDataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
-        >>> # path to imagefolder directory. This directory needs to contain sub-directories which contain the images
+        >>>
+        >>> # Set path to the imagefolder directory.
+        >>> # This directory needs to contain sub-directories which contain the images
        >>> dataset_dir = "/path/to/imagefolder_directory"
-        >>> # 1) read all samples (image files) in dataset_dir with 8 threads
+        >>>
+        >>> # 1) Read all samples (image files) in dataset_dir with 8 threads
        >>> imagefolder_dataset = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
-        >>> # 2) read all samples (image files) from folder cat and folder dog with label 0 and 1
-        >>> imagefolder_dataset = ds.ImageFolderDataset(dataset_dir,class_indexing={"cat":0,"dog":1})
-        >>> # 3) read all samples (image files) in dataset_dir with extensions .JPEG and .png (case sensitive)
-        >>> imagefolder_dataset = ds.ImageFolderDataset(dataset_dir, extensions=[".JPEG",".png"])
+        >>>
+        >>> # 2) Read all samples (image files) from folder cat and folder dog with label 0 and 1
+        >>> imagefolder_dataset = ds.ImageFolderDataset(dataset_dir, class_indexing={"cat":0, "dog":1})
+        >>>
+        >>> # 3) Read all samples (image files) in dataset_dir with extensions .JPEG and .png (case sensitive)
+        >>> imagefolder_dataset = ds.ImageFolderDataset(dataset_dir, extensions=[".JPEG", ".png"])
    """

    @check_imagefolderdataset
@ -2912,10 +2937,11 @@ class MnistDataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_dir = "/path/to/mnist_folder"
-        >>> # 1) read 3 samples from mnist_dataset
+        >>> # Read 3 samples from MNIST dataset
        >>> mnist_dataset = ds.MnistDataset(dataset_dir=dataset_dir, num_samples=3)
-        >>> # in mnist_dataset dataset, each dictionary has keys "image" and "label"
+        >>> # Note: In mnist_dataset dataset, each dictionary has keys "image" and "label"
    """

    @check_mnist_cifar_dataset
@ -3418,35 +3444,39 @@ class GeneratorDataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> # 1) Multidimensional generator function as callable input
-        >>> def generator_md():
+        >>> def GeneratorMD():
        >>>     for i in range(64):
        >>>         yield (np.array([[i, i + 1], [i + 2, i + 3]]),)
-        >>> # create multi_dimension_generator_dataset with GeneratorMD and column name "multi_dimensional_data"
-        >>> multi_dimension_generator_dataset = ds.GeneratorDataset(generator_md, ["multi_dimensional_data"])
+        >>> # Create multi_dimension_generator_dataset with GeneratorMD and column name "multi_dimensional_data"
+        >>> multi_dimension_generator_dataset = ds.GeneratorDataset(GeneratorMD, ["multi_dimensional_data"])
+        >>>
        >>> # 2) Multi-column generator function as callable input
-        >>> def generator_mc(maxid = 64):
+        >>> def GeneratorMC(maxid = 64):
        >>>     for i in range(maxid):
        >>>         yield (np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]]))
-        >>> # create multi_column_generator_dataset with GeneratorMC and column names "col1" and "col2"
-        >>> multi_column_generator_dataset = ds.GeneratorDataset(generator_mc, ["col1", "col2"])
+        >>> # Create multi_column_generator_dataset with GeneratorMC and column names "col1" and "col2"
+        >>> multi_column_generator_dataset = ds.GeneratorDataset(GeneratorMC, ["col1", "col2"])
+        >>>
        >>> # 3) Iterable dataset as iterable input
        >>> class MyIterable():
        >>>     def __iter__(self):
        >>>         return # User implementation
-        >>> # create iterable_generator_dataset with MyIterable object
+        >>> # Create iterable_generator_dataset with MyIterable object
        >>> iterable_generator_dataset = ds.GeneratorDataset(MyIterable(), ["col1"])
-        >>> # 4) Random accessible dataset as Random accessible input
+        >>>
+        >>> # 4) Random accessible dataset as random accessible input
        >>> class MyRA():
        >>>     def __getitem__(self, index):
        >>>         return # User implementation
-        >>> # create ra_generator_dataset with MyRA object
+        >>> # Create ra_generator_dataset with MyRA object
        >>> ra_generator_dataset = ds.GeneratorDataset(MyRA(), ["col1"])
        >>> # List/Dict/Tuple is also random accessible
        >>> list_generator = ds.GeneratorDataset([(np.array(0),), (np.array(1)), (np.array(2))], ["col1"])
+        >>>
        >>> # 5) Built-in Sampler
        >>> my_generator = ds.GeneratorDataset(my_ds, ["img", "label"], sampler=samplers.RandomSampler())
-        >>>
    """

    @check_generatordataset
@ -3602,15 +3632,19 @@ class TFRecordDataset(SourceDataset):
    Examples:
        >>> import mindspore.dataset as ds
        >>> import mindspore.common.dtype as mstype
+        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple tf data files
-        >>> # 1) get all rows from dataset_files with no explicit schema:
+        >>>
+        >>> # 1) Get all rows from dataset_files with no explicit schema
        >>> # The meta-data in the first row will be used as a schema.
        >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files)
-        >>> # 2) get all rows from dataset_files with user-defined schema:
+        >>>
+        >>> # 2) Get all rows from dataset_files with user-defined schema
        >>> schema = ds.Schema()
        >>> schema.add_column('col_1d', de_type=mindspore.int64, shape=[2])
        >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema=schema)
-        >>> # 3) get all rows from dataset_files with schema file "./schema.json":
+        >>>
+        >>> # 3) Get all rows from dataset_files with schema file "./schema.json"
        >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema="./schema.json")
    """

@ -3773,10 +3807,14 @@ class ManifestDataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_file = "/path/to/manifest_file.manifest"
-        >>> # 1) read all samples specified in manifest_file dataset with 8 threads for training:
+        >>>
+        >>> # 1) Read all samples specified in manifest_file dataset with 8 threads for training
        >>> manifest_dataset = ds.ManifestDataset(dataset_file, usage="train", num_parallel_workers=8)
-        >>> # 2) reads samples (specified in manifest_file.manifest) for shard 0 in a 2-way distributed training setup:
+        >>>
+        >>> # 2) Read samples (specified in manifest_file.manifest) for shard 0
+        >>> # in a 2-way distributed training setup
        >>> manifest_dataset = ds.ManifestDataset(dataset_file, num_shards=2, shard_id=0)

    """
@ -3951,14 +3989,19 @@ class Cifar10Dataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_dir = "/path/to/cifar10_dataset_directory"
-        >>> # 1) get all samples from CIFAR10 dataset in sequence:
-        >>> dataset = ds.Cifar10Dataset(dataset_dir=dataset_dir,shuffle=False)
-        >>> # 2) randomly select 350 samples from CIFAR10 dataset:
-        >>> dataset = ds.Cifar10Dataset(dataset_dir=dataset_dir,num_samples=350, shuffle=True)
-        >>> # 3) get samples from CIFAR10 dataset for shard 0 in a 2 way distributed training:
-        >>> dataset = ds.Cifar10Dataset(dataset_dir=dataset_dir,num_shards=2,shard_id=0)
-        >>> # in CIFAR10 dataset, each dictionary has keys "image" and "label"
+        >>>
+        >>> # 1) Get all samples from CIFAR10 dataset in sequence
+        >>> dataset = ds.Cifar10Dataset(dataset_dir=dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from CIFAR10 dataset
+        >>> dataset = ds.Cifar10Dataset(dataset_dir=dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # 3) Get samples from CIFAR10 dataset for shard 0 in a 2-way distributed training
+        >>> dataset = ds.Cifar10Dataset(dataset_dir=dataset_dir, num_shards=2, shard_id=0)
+        >>>
+        >>> # In CIFAR10 dataset, each dictionary has keys "image" and "label"
    """

    @check_mnist_cifar_dataset
@ -4093,12 +4136,16 @@ class Cifar100Dataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_dir = "/path/to/cifar100_dataset_directory"
-        >>> # 1) get all samples from CIFAR100 dataset in sequence:
-        >>> cifar100_dataset = ds.Cifar100Dataset(dataset_dir=dataset_dir,shuffle=False)
-        >>> # 2) randomly select 350 samples from CIFAR100 dataset:
-        >>> cifar100_dataset = ds.Cifar100Dataset(dataset_dir=dataset_dir,num_samples=350, shuffle=True)
-        >>> # in CIFAR100 dataset, each dictionary has 3 keys: "image", "fine_label" and "coarse_label"
+        >>>
+        >>> # 1) Get all samples from CIFAR100 dataset in sequence
+        >>> cifar100_dataset = ds.Cifar100Dataset(dataset_dir=dataset_dir, shuffle=False)
+        >>>
+        >>> # 2) Randomly select 350 samples from CIFAR100 dataset
+        >>> cifar100_dataset = ds.Cifar100Dataset(dataset_dir=dataset_dir, num_samples=350, shuffle=True)
+        >>>
+        >>> # In CIFAR100 dataset, each dictionary has 3 keys: "image", "fine_label" and "coarse_label"
    """

    @check_mnist_cifar_dataset
@ -4265,7 +4312,8 @@ class Schema:
    Example:
        >>> import mindspore.dataset as ds
        >>> import mindspore.common.dtype as mstype
-        >>> # create schema, specify column name, mindspore.dtype and shape of the column
+        >>>
+        >>> # Create schema; specify column name, mindspore.dtype and shape of the column
        >>> schema = ds.Schema()
        >>> schema.add_column('col1', de_type=mindspore.int64, shape=[2])
    """
@ -4522,17 +4570,23 @@ class VOCDataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_dir = "/path/to/voc_dataset_directory"
-        >>> # 1) read VOC data for segmenatation train
+        >>>
+        >>> # 1) Read VOC data for segmentatation training
        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Segmentation", usage="train")
-        >>> # 2) read VOC data for detection train
+        >>>
+        >>> # 2) Read VOC data for detection training
        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Detection", usage="train")
-        >>> # 3) read all VOC dataset samples in dataset_dir with 8 threads in random order:
+        >>>
+        >>> # 3) Read all VOC dataset samples in dataset_dir with 8 threads in random order
        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Detection", usage="train", num_parallel_workers=8)
-        >>> # 4) read then decode all VOC dataset samples in dataset_dir in sequence:
+        >>>
+        >>> # 4) Read then decode all VOC dataset samples in dataset_dir in sequence
        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Detection", usage="train", decode=True, shuffle=False)
-        >>> # in VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
-        >>> # in VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
+        >>>
+        >>> # In VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
+        >>> # In VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
    """

    @check_vocdataset
@ -4722,17 +4776,23 @@ class CocoDataset(MappableDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_dir = "/path/to/coco_dataset_directory/image_folder"
        >>> annotation_file = "/path/to/coco_dataset_directory/annotation_folder/annotation.json"
-        >>> # 1) read COCO data for Detection task
+        >>>
+        >>> # 1) Read COCO data for Detection task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Detection')
-        >>> # 2) read COCO data for Stuff task
+        >>>
+        >>> # 2) Read COCO data for Stuff task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Stuff')
-        >>> # 3) read COCO data for Panoptic task
+        >>>
+        >>> # 3) Read COCO data for Panoptic task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Panoptic')
-        >>> # 4) read COCO data for Keypoint task
+        >>>
+        >>> # 4) Read COCO data for Keypoint task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Keypoint')
-        >>> # in COCO dataset, each dictionary has keys "image" and "annotation"
+        >>>
+        >>> # In COCO dataset, each dictionary has keys "image" and "annotation"
    """

    @check_cocodataset
@ -4857,6 +4917,12 @@ class CelebADataset(MappableDataset):
            into (default=None).
        shard_id (int, optional): The shard ID within num_shards (default=None). This
            argument can only be specified when num_shards is also specified.
+
+    Examples:
+        >>> import mindspore.dataset as ds
+        >>>
+        >>> dataset_dir = "/path/to/celeba_directory"
+        >>> dataset = ds.CelebADataset(dataset_dir=dataset_dir, usage='train')
    """

    @check_celebadataset
@ -4976,6 +5042,7 @@ class CLUEDataset(SourceDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files
        >>> dataset = ds.CLUEDataset(dataset_files=dataset_files, task='AFQMC', usage='train')
    """
@ -5162,7 +5229,7 @@ class CLUEDataset(SourceDataset):

 class CSVDataset(SourceDataset):
    """
-    A source dataset that reads and parses CSV datasets.
+    A source dataset that reads and parses comma-separated values (CSV) datasets.

    Args:
        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search
@ -5192,6 +5259,7 @@ class CSVDataset(SourceDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files
        >>> dataset = ds.CSVDataset(dataset_files=dataset_files, column_names=['col1', 'col2', 'col3', 'col4'])
    """
@ -5288,6 +5356,7 @@ class TextFileDataset(SourceDataset):
            argument can only be specified when num_shards is also specified.
    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files
        >>> dataset = ds.TextFileDataset(dataset_files=dataset_files)
    """
@ -5455,10 +5524,10 @@ class NumpySlicesDataset(GeneratorDataset):

    Args:
        data (Union[list, tuple, dict]) Input of given data. Supported data types include: list, tuple, dict and other
-            NumPy formats. Input data will be sliced in first dimension and generate many rows. Large data is not
-            recommended to be loaded in this way as data is loading into memory.
+            NumPy formats. Input data will be sliced along the first dimension and generate additional rows.
+            Large data is not recommended to be loaded in this way as data is loading into memory.
        column_names (list[str], optional): List of column names of the dataset (default=None). If column_names is not
-            provided, when data is dict, column_names will be its key, otherwise it will be like column_1, column_2 ...
+            provided, when data is dict, column_names will be its keys, otherwise it will be like column_1, column_2 ...
        num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
        num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
        shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
@ -5472,16 +5541,20 @@ class NumpySlicesDataset(GeneratorDataset):

    Examples:
        >>> import mindspore.dataset as ds
+        >>>
        >>> # 1) Input data can be a list
        >>> data = [1, 2, 3]
        >>> dataset1 = ds.NumpySlicesDataset(data, column_names=["column_1"])
-        >>> # 2) Input data can be a dict, and column_names will be its key
+        >>>
+        >>> # 2) Input data can be a dictionary, and column_names will be its keys
        >>> data = {"a": [1, 2], "b": [3, 4]}
        >>> dataset2 = ds.NumpySlicesDataset(data)
+        >>>
        >>> # 3) Input data can be a tuple of lists (or NumPy arrays), each tuple element refers to data in each column
        >>> data = ([1, 2], [3, 4], [5, 6])
        >>> dataset3 = ds.NumpySlicesDataset(data, column_names=["column_1", "column_2", "column_3"])
-        >>> # 4) Load data from csv file
+        >>>
+        >>> # 4) Load data from CSV file
        >>> import pandas as pd
        >>> df = pd.read_csv("file.csv")
        >>> dataset4 = ds.NumpySlicesDataset(dict(df), shuffle=False)
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@ -223,7 +223,8 @@ class DistributedSampler(BuiltinSampler):
        shard_id (int): Shard ID of the current shard within num_shards.
        shuffle (bool, optional): If True, the indices are shuffled (default=True).
        num_samples (int, optional): The number of samples to draw (default=None, all elements).
-        offset(int, optional): Offset from shard when the element of dataset is allocated (default=-1).
+        offset(int, optional): The starting sample ID where access to elements in the dataset begins (default=-1).
+
    Examples:
        >>> import mindspore.dataset as ds
        >>>
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-The module text.transforms is inheritted from _c_dataengine
+The module text.transforms is inherited from _c_dataengine
 and is implemented based on ICU4C and cppjieba in C++.
 It's a high performance module to process NLP text.
 Users can use Vocab to build their own dictionary,
@ -23,26 +23,26 @@ and use Lookup to find the index of tokens in Vocab.
    A constructor's arguments for every class in this module must be saved into the
    class attributes (self.xxx) to support save() and load().

-    Examples:
-        >>> import mindspore.dataset as ds
-        >>> import mindspore.dataset.text as text
-        >>>
-        >>> dataset_file = "path/to/text_file_path"
-        >>> # sentences as line data saved in a file
-        >>> dataset = ds.TextFileDataset(dataset_file, shuffle=False)
-        >>> # tokenize sentence to unicode characters
-        >>> tokenizer = text.UnicodeCharTokenizer()
-        >>> # load vocabulary form list
-        >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您'])
-        >>> # lookup is an operation for mapping tokens to ids
-        >>> lookup = text.Lookup(vocab)
-        >>> dataset = dataset.map(operations=[tokenizer, lookup])
-        >>> for i in dataset.create_dict_iterator():
-        >>>     print(i)
-        >>> # if text line in dataset_file is:
-        >>> # 深圳欢迎您
-        >>> # then the output will be:
-        >>> # {'text': array([0, 1, 2, 3, 4], dtype=int32)}
+Examples:
+    >>> import mindspore.dataset as ds
+    >>> import mindspore.dataset.text as text
+    >>>
+    >>> dataset_file = "path/to/text_file_path"
+    >>> # sentences as line data saved in a file
+    >>> dataset = ds.TextFileDataset(dataset_file, shuffle=False)
+    >>> # tokenize sentence to unicode characters
+    >>> tokenizer = text.UnicodeCharTokenizer()
+    >>> # load vocabulary form list
+    >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您'])
+    >>> # lookup is an operation for mapping tokens to ids
+    >>> lookup = text.Lookup(vocab)
+    >>> dataset = dataset.map(operations=[tokenizer, lookup])
+    >>> for i in dataset.create_dict_iterator():
+    >>>     print(i)
+    >>> # if text line in dataset_file is:
+    >>> # 深圳欢迎您
+    >>> # then the output will be:
+    >>> # {'text': array([0, 1, 2, 3, 4], dtype=int32)}
 """
 import os
 import re
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """
-This module c_transforms provides common operations, including OneHotOp and TypeCast.
+The module transforms.c_transforms provides common operations, including OneHotOp and TypeCast.
 """
 from enum import IntEnum
 import numpy as np
--- a/mindspore/dataset/transforms/py_transforms.py
+++ b/mindspore/dataset/transforms/py_transforms.py
@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """
-This module py_transforms is implemented basing on Python. It provides common
+The module transforms.py_transform is implemented based on Python. It provides common
 operations including OneHotOp.
 """
 from .validators import check_one_hot_op, check_compose_list, check_random_apply, check_transforms_list, \
@ -80,11 +79,11 @@ class Compose:
        >>> # create a dataset that reads all files in dataset_dir with 8 threads
        >>> dataset = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
        >>> # create a list of transformations to be applied to the image data
-        >>> transform = py_transform.Compose([py_vision.Decode(),
-        >>>                                  py_vision.RandomHorizontalFlip(0.5),
-        >>>                                  py_vision.ToTensor(),
-        >>>                                  py_vision.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)),
-        >>>                                  py_vision.RandomErasing()])
+        >>> transform = py_transforms.Compose([py_vision.Decode(),
+        >>>                                   py_vision.RandomHorizontalFlip(0.5),
+        >>>                                   py_vision.ToTensor(),
+        >>>                                   py_vision.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)),
+        >>>                                   py_vision.RandomErasing()])
        >>> # apply the transform to the dataset through dataset.map()
        >>> dataset = dataset.map(operations=transform, input_columns="image")
    """
--- a/mindspore/dataset/vision/c_transforms.py
+++ b/mindspore/dataset/vision/c_transforms.py
@ -22,26 +22,26 @@ to improve their training models.
    A constructor's arguments for every class in this module must be saved into the
    class attributes (self.xxx) to support save() and load().

-    Examples:
-        >>> import mindspore.dataset as ds
-        >>> import mindspore.dataset.transforms.c_transforms as c_transforms
-        >>> import mindspore.dataset.vision.c_transforms as c_vision
-        >>> from mindspore.dataset.vision import Border, Inter
-        >>>
-        >>> dataset_dir = "path/to/imagefolder_directory"
-        >>> # create a dataset that reads all files in dataset_dir with 8 threads
-        >>> data1 = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
-        >>> # create a list of transformations to be applied to the image data
-        >>> transforms_list = [c_vision.Decode(),
-        >>>                    c_vision.Resize((256, 256), interpolation=Inter.LINEAR),
-        >>>                    c_vision.RandomCrop(200, padding_mode=Border.EDGE),
-        >>>                    c_vision.RandomRotation((0, 15)),
-        >>>                    c_vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)),
-        >>>                    c_vision.HWC2CHW()]
-        >>> onehot_op = c_transforms.OneHot(num_classes=10)
-        >>> # apply the transformation to the dataset through data1.map()
-        >>> data1 = data1.map(operations=transforms_list, input_columns="image")
-        >>> data1 = data1.map(operations=onehot_op, input_columns="label")
+Examples:
+    >>> import mindspore.dataset as ds
+    >>> import mindspore.dataset.transforms.c_transforms as c_transforms
+    >>> import mindspore.dataset.vision.c_transforms as c_vision
+    >>> from mindspore.dataset.vision import Border, Inter
+    >>>
+    >>> dataset_dir = "path/to/imagefolder_directory"
+    >>> # create a dataset that reads all files in dataset_dir with 8 threads
+    >>> data1 = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
+    >>> # create a list of transformations to be applied to the image data
+    >>> transforms_list = [c_vision.Decode(),
+    >>>                    c_vision.Resize((256, 256), interpolation=Inter.LINEAR),
+    >>>                    c_vision.RandomCrop(200, padding_mode=Border.EDGE),
+    >>>                    c_vision.RandomRotation((0, 15)),
+    >>>                    c_vision.Normalize((100, 115.0, 121.0), (71.0, 68.0, 70.0)),
+    >>>                    c_vision.HWC2CHW()]
+    >>> onehot_op = c_transforms.OneHot(num_classes=10)
+    >>> # apply the transformation to the dataset through data1.map()
+    >>> data1 = data1.map(operations=transforms_list, input_columns="image")
+    >>> data1 = data1.map(operations=onehot_op, input_columns="label")
 """
 import numbers
 import mindspore._c_dataengine as cde
--- a/mindspore/dataset/vision/py_transforms.py
+++ b/mindspore/dataset/vision/py_transforms.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """
 The module vision.py_transforms is implemented based on Python PIL.
 This module provides many kinds of image augmentations. It also provides
@ -50,9 +49,9 @@ class ToTensor:
    Convert the input NumPy image array or PIL image of shape (H, W, C) to a NumPy ndarray of shape (C, H, W).

    Note:
-        The ranges of values in height and width dimension are converted from [0, 255] to [0.0, 1.0].
+        The values in the input arrays are rescaled from [0, 255] to [0.0, 1.0].
        The type is cast to output_type (default NumPy float32).
-        The range of channel dimension remains the same.
+        The number of channels remains the same.

    Args:
        output_type (NumPy datatype, optional): The datatype of the NumPy output (default=np.float32).