forked from mindspore-Ecosystem/mindspore
fix example code and docs
This commit is contained in:
parent
5b4e912625
commit
82b4d74ef5
|
@ -61,6 +61,7 @@
|
|||
|
||||
# MindData
|
||||
"mindspore/mindspore/python/mindspore/dataset/__init__.py" "redefined-builtin"
|
||||
"mindspore/mindspore/python/mindspore/dataset/audio/transforms.py" "super-init-not-called"
|
||||
"mindspore/mindspore/python/mindspore/dataset/engine/__init__.py" "redefined-builtin"
|
||||
"mindspore/mindspore/python/mindspore/dataset/engine/datasets.py" "redefined-builtin"
|
||||
"mindspore/mindspore/python/mindspore/dataset/engine/datasets.py" "broad-except"
|
||||
|
|
|
@ -32,10 +32,11 @@
|
|||
- **drop_remainder** (bool, 可选) - 当最后一个批处理数据包含的数据条目小于 `batch_size` 时,是否将该批处理丢弃,不传递给下一个操作。默认值:False,不丢弃。
|
||||
- **num_parallel_workers** (int, 可选) - 指定 `batch` 操作的并发进程数/线程数(由参数 `python_multiprocessing` 决定当前为多进程模式或多线程模式)。
|
||||
默认值:None,使用mindspore.dataset.config中配置的线程数。
|
||||
- **per_batch_map** (callable, 可选) - 可调用对象,以(list[Tensor], list[Tensor], ..., BatchInfo)作为输入参数,处理后返回(list[Tensor], list[Tensor],...)作为新的数据列。
|
||||
输入参数中每个list[Tensor]代表给定数据列中的一批Tensor,list[Tensor]的个数应与 `input_columns` 中传入列名的数量相匹配,
|
||||
在返回的(list[Tensor], list[Tensor], ...)中,list[Tensor]的个数应与输入相同,如果输出列数与输入列数不一致,则需要指定 `output_columns`。
|
||||
该可调用对象的最后一个输入参数始终是BatchInfo,用于获取数据集的信息,用法参考样例(2)。
|
||||
- **per_batch_map** (callable, 可选) - 可调用对象,以(list[numpy.ndarray], list[numpy.ndarray], ..., BatchInfo)作为输入参数,
|
||||
处理后返回(list[numpy.ndarray], list[numpy.ndarray],...)作为新的数据列。输入参数中每个list[numpy.ndarray]代表给定数据列中的一批numpy.ndarray,
|
||||
list[numpy.ndarray]的个数应与 `input_columns` 中传入列名的数量相匹配,在返回的(list[numpy.ndarray], list[numpy.ndarray], ...)中,
|
||||
list[numpy.ndarray]的个数应与输入相同,如果输出列数与输入列数不一致,则需要指定 `output_columns`。该可调用对象的最后一个输入参数始终是BatchInfo,
|
||||
用于获取数据集的信息,用法参考样例(2)。
|
||||
- **input_columns** (Union[str, list[str]], 可选):指定 `batch` 操作的输入数据列。
|
||||
如果 `per_batch_map` 不为None,列表中列名的个数应与 `per_batch_map` 中包含的列数匹配。默认值:None,不指定。
|
||||
- **output_columns** (Union[str, list[str]], 可选) - 指定 `batch` 操作的输出数据列。如果输入数据列与输入数据列的长度不相等,则必须指定此参数。
|
||||
|
|
|
@ -150,7 +150,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
}
|
||||
|
||||
/// \brief Function to transfer data through a device.
|
||||
/// \notes If device is Ascend, features of data will be transferred one by one. The limitation
|
||||
/// \note If device is Ascend, features of data will be transferred one by one. The limitation
|
||||
/// of data transmission per time is 256M.
|
||||
/// \param[in] queue_name Channel name (default="", create new unique name).
|
||||
/// \param[in] device_type Type of device (default="", get from MSContext).
|
||||
|
@ -193,7 +193,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
}
|
||||
|
||||
/// \brief Function to create a BatchDataset
|
||||
/// \notes Combines batch_size number of consecutive rows into batches
|
||||
/// \note Combines batch_size number of consecutive rows into batches
|
||||
/// \param[in] batch_size The number of rows each batch is created with
|
||||
/// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
|
||||
/// batch. If true, and if there are less than batch_size rows
|
||||
|
@ -209,7 +209,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
/// \notes Applies each operation in operations to this dataset
|
||||
/// \note Applies each operation in operations to this dataset
|
||||
/// \param[in] operations Vector of raw pointers to TensorTransform objects to be applied on the dataset. Operations
|
||||
/// are applied in the order they appear in this list
|
||||
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
|
||||
|
@ -274,7 +274,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
}
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
/// \notes Applies each operation in operations to this dataset
|
||||
/// \note Applies each operation in operations to this dataset
|
||||
/// \param[in] operations Vector of shared pointers to TensorTransform objects to be applied on the dataset.
|
||||
/// Operations are applied in the order they appear in this list
|
||||
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
|
||||
|
@ -306,7 +306,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
}
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
/// \notes Applies each operation in operations to this dataset
|
||||
/// \note Applies each operation in operations to this dataset
|
||||
/// \param[in] operations Vector of TensorTransform objects to be applied on the dataset. Operations are applied in
|
||||
/// the order they appear in this list
|
||||
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
|
||||
|
@ -336,7 +336,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
}
|
||||
|
||||
/// \brief Function to create a Project Dataset
|
||||
/// \notes Applies project to the dataset
|
||||
/// \note Applies project to the dataset
|
||||
/// \param[in] columns The name of columns to project
|
||||
/// \return Shared pointer to the current Dataset
|
||||
/// \par Example
|
||||
|
@ -350,7 +350,7 @@ class MS_API Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
}
|
||||
|
||||
/// \brief Function to create a Shuffle Dataset
|
||||
/// \notes Randomly shuffles the rows of this dataset
|
||||
/// \note Randomly shuffles the rows of this dataset
|
||||
/// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
|
||||
/// \return Shared pointer to the current ShuffleDataset
|
||||
/// \par Example
|
||||
|
@ -576,7 +576,7 @@ class MS_API AlbumDataset : public Dataset {
|
|||
};
|
||||
|
||||
/// \brief Function to create an AlbumDataset
|
||||
/// \notes The generated dataset is specified through setting a schema
|
||||
/// \note The generated dataset is specified through setting a schema
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] data_schema Path to dataset schema file
|
||||
/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
|
||||
|
@ -611,7 +611,7 @@ Album(const std::string &dataset_dir, const std::string &data_schema, const std:
|
|||
}
|
||||
|
||||
/// \brief Function to create an AlbumDataset
|
||||
/// \notes The generated dataset is specified through setting a schema
|
||||
/// \note The generated dataset is specified through setting a schema
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] data_schema Path to dataset schema file
|
||||
/// \param[in] column_names Column names used to specify columns to load
|
||||
|
@ -628,7 +628,7 @@ inline std::shared_ptr<AlbumDataset> MS_API Album(const std::string &dataset_dir
|
|||
}
|
||||
|
||||
/// \brief Function to create an AlbumDataset
|
||||
/// \notes The generated dataset is specified through setting a schema
|
||||
/// \note The generated dataset is specified through setting a schema
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] data_schema Path to dataset schema file
|
||||
/// \param[in] column_names Column names used to specify columns to load
|
||||
|
@ -676,7 +676,7 @@ class MS_API MnistDataset : public Dataset {
|
|||
};
|
||||
|
||||
/// \brief Function to create a MnistDataset
|
||||
/// \notes The generated dataset has two columns ["image", "label"]
|
||||
/// \note The generated dataset has two columns ["image", "label"]
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all").
|
||||
/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
|
||||
|
@ -705,7 +705,7 @@ Mnist(const std::string &dataset_dir, const std::string &usage = "all",
|
|||
}
|
||||
|
||||
/// \brief Function to create a MnistDataset
|
||||
/// \notes The generated dataset has two columns ["image", "label"]
|
||||
/// \note The generated dataset has two columns ["image", "label"]
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] usage of MNIST, can be "train", "test" or "all"
|
||||
/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
|
||||
|
@ -718,7 +718,7 @@ inline std::shared_ptr<MnistDataset> MS_API Mnist(const std::string &dataset_dir
|
|||
}
|
||||
|
||||
/// \brief Function to create a MnistDataset
|
||||
/// \notes The generated dataset has two columns ["image", "label"]
|
||||
/// \note The generated dataset has two columns ["image", "label"]
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] usage of MNIST, can be "train", "test" or "all"
|
||||
/// \param[in] sampler Sampler object used to choose samples from the dataset.
|
||||
|
|
|
@ -1409,9 +1409,9 @@ class PhaseVocoder(AudioTensorOperation):
|
|||
Examples:
|
||||
>>> import numpy as np
|
||||
>>>
|
||||
>>> waveform = np.random.randn(2, 44, 10, 2)
|
||||
>>> waveform = np.random.random([2, 44, 10, 2])
|
||||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
||||
>>> phase_advance = np.random.randn(44, 1)
|
||||
>>> phase_advance = np.random.random([44, 1])
|
||||
>>> transforms = [audio.PhaseVocoder(rate=2, phase_advance=phase_advance)]
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
||||
"""
|
||||
|
@ -1661,7 +1661,7 @@ class TimeStretch(AudioTensorOperation):
|
|||
Examples:
|
||||
>>> import numpy as np
|
||||
>>>
|
||||
>>> waveform = np.random.random([1, 30])
|
||||
>>> waveform = np.random.random([44, 10, 2])
|
||||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
||||
>>> transforms = [audio.TimeStretch()]
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
||||
|
|
|
@ -574,11 +574,12 @@ class Dataset:
|
|||
num_parallel_workers (int, optional): Number of workers(threads) to process the dataset in parallel
|
||||
(default=None).
|
||||
per_batch_map (callable, optional): Per batch map callable (default=None). A callable which takes
|
||||
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch
|
||||
of Tensors on a given column. The number of lists should match with the number of entries in
|
||||
input_columns. The last parameter of the callable should always be a BatchInfo object. Per_batch_map
|
||||
should return (list[Tensor], list[Tensor], ...). The length of each list in output should be the same as
|
||||
the input. output_columns is required if the number of output lists is different from input.
|
||||
(list[numpy.ndarray], list[numpy.ndarray], ..., BatchInfo) as input parameters. Each
|
||||
list[numpy.ndarray] represents a batch of numpy.ndarray on a given column. The number of lists should
|
||||
match with the number of entries in input_columns. The last parameter of the callable should always be
|
||||
a BatchInfo object. Per_batch_map should return (list[numpy.ndarray], list[numpy.ndarray], ...). The
|
||||
length of each list in output should be the same as the input. output_columns is required if the number
|
||||
of output lists is different from input.
|
||||
input_columns (Union[str, list[str]], optional): List of names of the input columns. The size of the list
|
||||
should match with signature of per_batch_map callable (default=None).
|
||||
output_columns (Union[str, list[str]], optional): List of names assigned to the columns
|
||||
|
|
|
@ -488,7 +488,7 @@ class HWC2CHW(py_transforms.PyTensorOperation):
|
|||
Examples:
|
||||
>>> from mindspore.dataset.transforms.py_transforms import Compose
|
||||
>>>
|
||||
>>> transforms_list = Compose([py_vision.Decode(),
|
||||
>>> transforms_list = Compose([c_vision.Decode(),
|
||||
... py_vision.HWC2CHW()])
|
||||
>>> # apply the transform to dataset through map function
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
|
||||
|
@ -627,11 +627,19 @@ class MixUp(py_transforms.PyTensorOperation):
|
|||
``CPU``
|
||||
|
||||
Examples:
|
||||
>>> # Setup multi-batch mixup transformation
|
||||
>>> transform = [py_vision.MixUp(batch_size=16, alpha=0.2, is_single=False)]
|
||||
>>> # Apply the transform to the dataset through dataset.map()
|
||||
>>> image_folder_dataset = image_folder_dataset.map(input_columns="image",
|
||||
... operations=transform)
|
||||
>>> # first decode the image
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=c_vision.Decode(),
|
||||
... input_columns="image")
|
||||
>>> # then ont hot decode the label
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=c_transforms.OneHot(10),
|
||||
... input_columns="label")
|
||||
>>> # batch the samples
|
||||
>>> batch_size = 4
|
||||
>>> image_folder_dataset = image_folder_dataset.batch(batch_size=batch_size)
|
||||
>>> # finally mix up the images and labels
|
||||
>>> image_folder_dataset = image_folder_dataset.map(
|
||||
... operations=py_vision.MixUp(batch_size=batch_size, alpha=0.2),
|
||||
... input_columns=["image", "label"])
|
||||
"""
|
||||
|
||||
@check_mix_up
|
||||
|
|
Loading…
Reference in New Issue