From 82b4d74ef59cb5ad8d64c7cbd89f3de6a35bd314 Mon Sep 17 00:00:00 2001
From: Xiao Tianci <xiaotianci1@huawei.com>
Date: Wed, 27 Apr 2022 10:59:22 +0800
Subject: [PATCH] fix example code and docs

---
 .jenkins/check/config/filter_pylint.txt       |  1 +
 .../dataset/mindspore.dataset.Dataset.rst     |  9 ++++---
 .../dataset/liteapi/include/datasets.h        | 26 +++++++++----------
 .../mindspore/dataset/audio/transforms.py     |  6 ++---
 .../mindspore/dataset/engine/datasets.py      | 11 ++++----
 .../mindspore/dataset/vision/py_transforms.py | 20 +++++++++-----
 6 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/.jenkins/check/config/filter_pylint.txt b/.jenkins/check/config/filter_pylint.txt
index 4fa9220446e..412cec87ef0 100644
--- a/.jenkins/check/config/filter_pylint.txt
+++ b/.jenkins/check/config/filter_pylint.txt
@@ -61,6 +61,7 @@
 
 # MindData
 "mindspore/mindspore/python/mindspore/dataset/__init__.py"                                                               "redefined-builtin"
+"mindspore/mindspore/python/mindspore/dataset/audio/transforms.py"                                                       "super-init-not-called"
 "mindspore/mindspore/python/mindspore/dataset/engine/__init__.py"                                                        "redefined-builtin"
 "mindspore/mindspore/python/mindspore/dataset/engine/datasets.py"                                                        "redefined-builtin"
 "mindspore/mindspore/python/mindspore/dataset/engine/datasets.py"                                                        "broad-except"
diff --git a/docs/api/api_python/dataset/mindspore.dataset.Dataset.rst b/docs/api/api_python/dataset/mindspore.dataset.Dataset.rst
index ef39b9de5f7..3223ab317f6 100644
--- a/docs/api/api_python/dataset/mindspore.dataset.Dataset.rst
+++ b/docs/api/api_python/dataset/mindspore.dataset.Dataset.rst
@@ -32,10 +32,11 @@
     - **drop_remainder** (bool, 可选) - 当最后一个批处理数据包含的数据条目小于 `batch_size` 时，是否将该批处理丢弃，不传递给下一个操作。默认值：False，不丢弃。
     - **num_parallel_workers** (int, 可选) - 指定 `batch` 操作的并发进程数/线程数（由参数 `python_multiprocessing` 决定当前为多进程模式或多线程模式）。
       默认值：None，使用mindspore.dataset.config中配置的线程数。
-    - **per_batch_map** (callable, 可选) - 可调用对象，以(list[Tensor], list[Tensor], ..., BatchInfo)作为输入参数，处理后返回(list[Tensor], list[Tensor],...)作为新的数据列。
-      输入参数中每个list[Tensor]代表给定数据列中的一批Tensor，list[Tensor]的个数应与 `input_columns` 中传入列名的数量相匹配，
-      在返回的(list[Tensor], list[Tensor], ...)中，list[Tensor]的个数应与输入相同，如果输出列数与输入列数不一致，则需要指定 `output_columns`。
-      该可调用对象的最后一个输入参数始终是BatchInfo，用于获取数据集的信息，用法参考样例（2）。
+    - **per_batch_map** (callable, 可选) - 可调用对象，以(list[numpy.ndarray], list[numpy.ndarray], ..., BatchInfo)作为输入参数，
+      处理后返回(list[numpy.ndarray], list[numpy.ndarray],...)作为新的数据列。输入参数中每个list[numpy.ndarray]代表给定数据列中的一批numpy.ndarray，
+      list[numpy.ndarray]的个数应与 `input_columns` 中传入列名的数量相匹配，在返回的(list[numpy.ndarray], list[numpy.ndarray], ...)中，
+      list[numpy.ndarray]的个数应与输入相同，如果输出列数与输入列数不一致，则需要指定 `output_columns`。该可调用对象的最后一个输入参数始终是BatchInfo，
+      用于获取数据集的信息，用法参考样例（2）。
     - **input_columns** (Union[str, list[str]], 可选)：指定 `batch` 操作的输入数据列。
       如果 `per_batch_map` 不为None，列表中列名的个数应与 `per_batch_map` 中包含的列数匹配。默认值：None，不指定。
     - **output_columns** (Union[str, list[str]], 可选) - 指定 `batch` 操作的输出数据列。如果输入数据列与输入数据列的长度不相等，则必须指定此参数。
diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h
index cb719fa01a6..c07201733e9 100644
--- a/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h
@@ -150,7 +150,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   }
 
   /// \brief Function to transfer data through a device.
-  /// \notes If device is Ascend, features of data will be transferred one by one. The limitation
+  /// \note If device is Ascend, features of data will be transferred one by one. The limitation
   ///     of data transmission per time is 256M.
   /// \param[in] queue_name Channel name (default="", create new unique name).
   /// \param[in] device_type Type of device (default="", get from MSContext).
@@ -193,7 +193,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   }
 
   /// \brief Function to create a BatchDataset
-  /// \notes Combines batch_size number of consecutive rows into batches
+  /// \note Combines batch_size number of consecutive rows into batches
   /// \param[in] batch_size The number of rows each batch is created with
   /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
   ///     batch. If true, and if there are less than batch_size rows
@@ -209,7 +209,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
 
   /// \brief Function to create a MapDataset
-  /// \notes Applies each operation in operations to this dataset
+  /// \note Applies each operation in operations to this dataset
   /// \param[in] operations Vector of raw pointers to TensorTransform objects to be applied on the dataset. Operations
   ///     are applied in the order they appear in this list
   /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
@@ -274,7 +274,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   }
 
   /// \brief Function to create a MapDataset
-  /// \notes Applies each operation in operations to this dataset
+  /// \note Applies each operation in operations to this dataset
   /// \param[in] operations Vector of shared pointers to TensorTransform objects to be applied on the dataset.
   ///     Operations are applied in the order they appear in this list
   /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
@@ -306,7 +306,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   }
 
   /// \brief Function to create a MapDataset
-  /// \notes Applies each operation in operations to this dataset
+  /// \note Applies each operation in operations to this dataset
   /// \param[in] operations Vector of TensorTransform objects to be applied on the dataset. Operations are applied in
   ///     the order they appear in this list
   /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
@@ -336,7 +336,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   }
 
   /// \brief Function to create a Project Dataset
-  /// \notes Applies project to the dataset
+  /// \note Applies project to the dataset
   /// \param[in] columns The name of columns to project
   /// \return Shared pointer to the current Dataset
   /// \par Example
@@ -350,7 +350,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
   }
 
   /// \brief Function to create a Shuffle Dataset
-  /// \notes Randomly shuffles the rows of this dataset
+  /// \note Randomly shuffles the rows of this dataset
   /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
   /// \return Shared pointer to the current ShuffleDataset
   /// \par Example
@@ -576,7 +576,7 @@ class MS_API AlbumDataset : public Dataset {
 };
 
 /// \brief Function to create an AlbumDataset
-/// \notes The generated dataset is specified through setting a schema
+/// \note The generated dataset is specified through setting a schema
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] data_schema Path to dataset schema file
 /// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
@@ -611,7 +611,7 @@ Album(const std::string &dataset_dir, const std::string &data_schema, const std:
 }
 
 /// \brief Function to create an AlbumDataset
-/// \notes The generated dataset is specified through setting a schema
+/// \note The generated dataset is specified through setting a schema
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] data_schema Path to dataset schema file
 /// \param[in] column_names Column names used to specify columns to load
@@ -628,7 +628,7 @@ inline std::shared_ptr<AlbumDataset> MS_API Album(const std::string &dataset_dir
 }
 
 /// \brief Function to create an AlbumDataset
-/// \notes The generated dataset is specified through setting a schema
+/// \note The generated dataset is specified through setting a schema
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] data_schema Path to dataset schema file
 /// \param[in] column_names Column names used to specify columns to load
@@ -676,7 +676,7 @@ class MS_API MnistDataset : public Dataset {
 };
 
 /// \brief Function to create a MnistDataset
-/// \notes The generated dataset has two columns ["image", "label"]
+/// \note The generated dataset has two columns ["image", "label"]
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all").
 /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
@@ -705,7 +705,7 @@ Mnist(const std::string &dataset_dir, const std::string &usage = "all",
 }
 
 /// \brief Function to create a MnistDataset
-/// \notes The generated dataset has two columns ["image", "label"]
+/// \note The generated dataset has two columns ["image", "label"]
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] usage of MNIST, can be "train", "test" or "all"
 /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
@@ -718,7 +718,7 @@ inline std::shared_ptr<MnistDataset> MS_API Mnist(const std::string &dataset_dir
 }
 
 /// \brief Function to create a MnistDataset
-/// \notes The generated dataset has two columns ["image", "label"]
+/// \note The generated dataset has two columns ["image", "label"]
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] usage of MNIST, can be "train", "test" or "all"
 /// \param[in] sampler Sampler object used to choose samples from the dataset.
diff --git a/mindspore/python/mindspore/dataset/audio/transforms.py b/mindspore/python/mindspore/dataset/audio/transforms.py
index 54b826cf2b7..8e394f5379f 100644
--- a/mindspore/python/mindspore/dataset/audio/transforms.py
+++ b/mindspore/python/mindspore/dataset/audio/transforms.py
@@ -1409,9 +1409,9 @@ class PhaseVocoder(AudioTensorOperation):
     Examples:
         >>> import numpy as np
         >>>
-        >>> waveform = np.random.randn(2, 44, 10, 2)
+        >>> waveform = np.random.random([2, 44, 10, 2])
         >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
-        >>> phase_advance = np.random.randn(44, 1)
+        >>> phase_advance = np.random.random([44, 1])
         >>> transforms = [audio.PhaseVocoder(rate=2, phase_advance=phase_advance)]
         >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
     """
@@ -1661,7 +1661,7 @@ class TimeStretch(AudioTensorOperation):
     Examples:
         >>> import numpy as np
         >>>
-        >>> waveform = np.random.random([1, 30])
+        >>> waveform = np.random.random([44, 10, 2])
         >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
         >>> transforms = [audio.TimeStretch()]
         >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
diff --git a/mindspore/python/mindspore/dataset/engine/datasets.py b/mindspore/python/mindspore/dataset/engine/datasets.py
index 118681eadd7..28fbed879ee 100644
--- a/mindspore/python/mindspore/dataset/engine/datasets.py
+++ b/mindspore/python/mindspore/dataset/engine/datasets.py
@@ -574,11 +574,12 @@ class Dataset:
             num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
                 (default=None).
             per_batch_map (callable, optional): Per batch map callable (default=None). A callable which takes
-                (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch
-                of Tensors on a given column. The number of lists should match with the number of entries in
-                input_columns. The last parameter of the callable should always be a BatchInfo object. Per_batch_map
-                should return (list[Tensor], list[Tensor], ...). The length of each list in output should be the same as
-                the input. output_columns is required if the number of output lists is different from input.
+                (list[numpy.ndarray], list[numpy.ndarray], ..., BatchInfo) as input parameters. Each
+                list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists should
+                match with the number of entries in input_columns. The last parameter of the callable should always be
+                a BatchInfo object. Per_batch_map should return (list[numpy.ndarray], list[numpy.ndarray], ...). The
+                length of each list in output should be the same as the input. output_columns is required if the number
+                of output lists is different from input.
             input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of the list
                 should match with signature of per_batch_map callable (default=None).
             output_columns (Union[str, list[str]], optional): List of names assigned to the columns
diff --git a/mindspore/python/mindspore/dataset/vision/py_transforms.py b/mindspore/python/mindspore/dataset/vision/py_transforms.py
index 7c0ab61e35f..7a81dcebccc 100644
--- a/mindspore/python/mindspore/dataset/vision/py_transforms.py
+++ b/mindspore/python/mindspore/dataset/vision/py_transforms.py
@@ -488,7 +488,7 @@ class HWC2CHW(py_transforms.PyTensorOperation):
     Examples:
         >>> from mindspore.dataset.transforms.py_transforms import Compose
         >>>
-        >>> transforms_list = Compose([py_vision.Decode(),
+        >>> transforms_list = Compose([c_vision.Decode(),
         ...                            py_vision.HWC2CHW()])
         >>> # apply the transform to dataset through map function
         >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
@@ -627,11 +627,19 @@ class MixUp(py_transforms.PyTensorOperation):
         ``CPU``
 
     Examples:
-        >>> # Setup multi-batch mixup transformation
-        >>> transform = [py_vision.MixUp(batch_size=16, alpha=0.2, is_single=False)]
-        >>> # Apply the transform to the dataset through dataset.map()
-        >>> image_folder_dataset = image_folder_dataset.map(input_columns="image",
-        ...                                                 operations=transform)
+        >>> # first decode the image
+        >>> image_folder_dataset = image_folder_dataset.map(operations=c_vision.Decode(),
+        ...                                                 input_columns="image")
+        >>> # then ont hot decode the label
+        >>> image_folder_dataset = image_folder_dataset.map(operations=c_transforms.OneHot(10),
+        ...                                                 input_columns="label")
+        >>> # batch the samples
+        >>> batch_size = 4
+        >>> image_folder_dataset = image_folder_dataset.batch(batch_size=batch_size)
+        >>> # finally mix up the images and labels
+        >>> image_folder_dataset = image_folder_dataset.map(
+        ...     operations=py_vision.MixUp(batch_size=batch_size, alpha=0.2),
+        ...     input_columns=["image", "label"])
     """
 
     @check_mix_up