From cd931fdb89ec4fe44e8592cb8f28cbcf1ef397e7 Mon Sep 17 00:00:00 2001 From: luoyang Date: Fri, 14 Jan 2022 17:12:31 +0800 Subject: [PATCH] fix minddata doc --- .../dataset/engine/datasets_audio.py | 8 ++-- .../engine/datasets_standard_format.py | 5 +- .../mindspore/dataset/engine/datasets_text.py | 4 +- .../dataset/engine/datasets_user_defined.py | 5 +- .../dataset/engine/datasets_vision.py | 46 +++++++++---------- 5 files changed, 35 insertions(+), 33 deletions(-) diff --git a/mindspore/python/mindspore/dataset/engine/datasets_audio.py b/mindspore/python/mindspore/dataset/engine/datasets_audio.py index 54d4eb72e05..5b07a0ece76 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets_audio.py +++ b/mindspore/python/mindspore/dataset/engine/datasets_audio.py @@ -34,7 +34,7 @@ from ..core.validator_helpers import replace_none class LJSpeechDataset(MappableDataset, AudioBaseDataset): """ - A source dataset for reading and parsing LJSpeech dataset. + A source dataset that reads and parses LJSpeech dataset. The generated dataset has four columns :py:obj:`[waveform, sample_rate, transcription, normalized_transcript]`. The tensor of column :py:obj:`waveform` is a tensor of the float32 type. @@ -170,7 +170,7 @@ class LJSpeechDataset(MappableDataset, AudioBaseDataset): class SpeechCommandsDataset(MappableDataset, AudioBaseDataset): """ - A source dataset for reading and parsing the SpeechCommands dataset. + A source dataset that reads and parses the SpeechCommands dataset. The generated dataset has five columns :py:obj:`[waveform, sample_rate, label, speaker_id, utterance_number]`. The tensor of column :py:obj:`waveform` is a vector of the float32 type. @@ -295,7 +295,7 @@ class SpeechCommandsDataset(MappableDataset, AudioBaseDataset): class TedliumDataset(MappableDataset, AudioBaseDataset): """ - A source dataset for reading and parsing Tedlium dataset. + A source dataset that reads and parses Tedlium dataset. The columns of generated dataset depend on the source SPH files and the corresponding STM files. The generated dataset has six columns :py:obj:`[waveform, sample_rate, transcript, talk_id, speaker_id, @@ -507,7 +507,7 @@ class TedliumDataset(MappableDataset, AudioBaseDataset): class YesNoDataset(MappableDataset, AudioBaseDataset): """ - A source dataset for reading and parsing the YesNo dataset. + A source dataset that reads and parses the YesNo dataset. The generated dataset has three columns :py:obj:`[waveform, sample_rate, labels]`. The tensor of column :py:obj:`waveform` is a vector of the float32 type. diff --git a/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py b/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py index 1a8a61f8f33..00dbbdbe6c9 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py +++ b/mindspore/python/mindspore/dataset/engine/datasets_standard_format.py @@ -38,6 +38,7 @@ class CSVDataset(SourceDataset, UnionBaseDataset): """ A source dataset that reads and parses comma-separated values `(CSV) `_ files as dataset. + The columns of generated dataset depend on the source CSV files. Args: @@ -101,7 +102,7 @@ class CSVDataset(SourceDataset, UnionBaseDataset): class MindDataset(MappableDataset, UnionBaseDataset): """ - A source dataset for reading and parsing MindRecord dataset. + A source dataset that reads and parses MindRecord dataset. The columns of generated dataset depend on the source MindRecord files. @@ -230,7 +231,7 @@ class MindDataset(MappableDataset, UnionBaseDataset): class TFRecordDataset(SourceDataset, UnionBaseDataset): """ - A source dataset for reading and parsing datasets stored on disk in TFData format. + A source dataset that reads and parses datasets stored on disk in TFData format. The columns of generated dataset depend on the source TFRecord files. diff --git a/mindspore/python/mindspore/dataset/engine/datasets_text.py b/mindspore/python/mindspore/dataset/engine/datasets_text.py index d4951afd8da..6508b44eace 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets_text.py +++ b/mindspore/python/mindspore/dataset/engine/datasets_text.py @@ -191,7 +191,7 @@ class AmazonReviewDataset(SourceDataset, TextBaseDataset): ├── test.csv └── readme.txt - Citation: + Citation: .. code-block:: @@ -600,7 +600,7 @@ class EnWik9Dataset(SourceDataset, TextBaseDataset): class IMDBDataset(MappableDataset, TextBaseDataset): """ - A source dataset for reading and parsing Internet Movie Database (IMDb). + A source dataset that reads and parses Internet Movie Database (IMDb). The generated dataset has two columns: :py:obj:`[text, label]`. The tensor of column :py:obj:`text` is of the string type. diff --git a/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py b/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py index 2ed50dd05ee..88de11ad83f 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py +++ b/mindspore/python/mindspore/dataset/engine/datasets_user_defined.py @@ -876,8 +876,9 @@ class _PaddedDataset: class PaddedDataset(GeneratorDataset): """ - Creates a dataset with filler data provided by user. Mainly used to add to the original dataset - and assign it to the corresponding shard. + Creates a dataset with filler data provided by user. + + Mainly used to add to the original dataset and assign it to the corresponding shard. Args: padded_samples (list(dict)): Samples provided by user. diff --git a/mindspore/python/mindspore/dataset/engine/datasets_vision.py b/mindspore/python/mindspore/dataset/engine/datasets_vision.py index bfedf61b258..c0eccee4afb 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets_vision.py +++ b/mindspore/python/mindspore/dataset/engine/datasets_vision.py @@ -401,7 +401,7 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset): class CelebADataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing CelebA dataset. + A source dataset that reads and parses CelebA dataset. Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset. The generated dataset has two columns: :py:obj:`[image, attr]`. @@ -565,7 +565,7 @@ class CelebADataset(MappableDataset, VisionBaseDataset): class Cifar10Dataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing Cifar10 dataset. + A source dataset that reads and parses Cifar10 dataset. This api only supports parsing Cifar10 file in binary version now. The generated dataset has two columns :py:obj:`[image, label]`. @@ -696,7 +696,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset): class Cifar100Dataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing Cifar100 dataset. + A source dataset that reads and parses Cifar100 dataset. The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -820,7 +820,7 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset): class CityscapesDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing Cityscapes dataset. + A source dataset that reads and parses Cityscapes dataset. The generated dataset has two columns :py:obj:`[image, task]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -990,7 +990,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset): class CocoDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing COCO dataset. + A source dataset that reads and parses COCO dataset. CocoDataset supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and Panoptic Segmentation of 2017 Train/Val/Test dataset. @@ -1201,7 +1201,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset): class DIV2KDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing DIV2KDataset dataset. + A source dataset that reads and parses DIV2KDataset dataset. The generated dataset has two columns :py:obj:`[hr_image, lr_image]`. The tensor of column :py:obj:`hr_image` is of the uint8 type. @@ -1388,7 +1388,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset): class EMnistDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the EMNIST dataset. + A source dataset that reads and parses the EMNIST dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -1609,7 +1609,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset): class FashionMnistDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the FASHION-MNIST dataset. + A source dataset that reads and parses the FASHION-MNIST dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -1730,7 +1730,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset): class FlickrDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing Flickr8k and Flickr30k dataset. + A source dataset that reads and parses Flickr8k and Flickr30k dataset. The generated dataset has two columns :py:obj:`[image, annotation]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -1973,7 +1973,7 @@ class _Flowers102Dataset: class Flowers102Dataset(GeneratorDataset): """ - A source dataset for reading and parsing Flowers102 dataset. + A source dataset that reads and parses Flowers102 dataset. The generated dataset has two columns :py:obj:`[image, label]` or three :py:obj:`[image, segmentation, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -2266,7 +2266,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset): class KMnistDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the KMNIST dataset. + A source dataset that reads and parses the KMNIST dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -2506,7 +2506,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset): class MnistDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the MNIST dataset. + A source dataset that reads and parses the MNIST dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -2626,7 +2626,7 @@ class MnistDataset(MappableDataset, VisionBaseDataset): class PhotoTourDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the PhotoTour dataset. + A source dataset that reads and parses the PhotoTour dataset. The generated dataset with different usage has different output columns. If train, the generated dataset has one column :py:obj:`[image]`, @@ -2780,7 +2780,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset): class Places365Dataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the Places365 dataset. + A source dataset that reads and parses the Places365 dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -2922,7 +2922,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset): class QMnistDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the QMNIST dataset. + A source dataset that reads and parses the QMNIST dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3148,7 +3148,7 @@ class _SBDataset: class SBDataset(GeneratorDataset): """ - A source dataset for reading and parsing Semantic Boundaries Dataset. + A source dataset that reads and parses Semantic Boundaries Dataset. The generated dataset has two columns: :py:obj:`[image, task]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3280,7 +3280,7 @@ class SBDataset(GeneratorDataset): class SBUDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the SBU dataset. + A source dataset that reads and parses the SBU dataset. The generated dataset has two columns :py:obj:`[image, caption]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3394,7 +3394,7 @@ class SBUDataset(MappableDataset, VisionBaseDataset): class SemeionDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing Semeion dataset. + A source dataset that reads and parses Semeion dataset. The generated dataset has two columns :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3514,7 +3514,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset): class STL10Dataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing STL10 dataset. + A source dataset that reads and parses STL10 dataset. The generated dataset has two columns: :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3686,7 +3686,7 @@ class _SVHNDataset: class SVHNDataset(GeneratorDataset): """ - A source dataset for reading and parsing SVHN dataset. + A source dataset that reads and parses SVHN dataset. The generated dataset has two columns: :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3798,7 +3798,7 @@ class SVHNDataset(GeneratorDataset): class USPSDataset(SourceDataset, VisionBaseDataset): """ - A source dataset for reading and parsing the USPS dataset. + A source dataset that reads and parses the USPS dataset. The generated dataset has two columns: :py:obj:`[image, label]`. The tensor of column :py:obj:`image` is of the uint8 type. @@ -3896,7 +3896,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset): class VOCDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing VOC dataset. + A source dataset that reads and parses VOC dataset. The generated dataset with different task setting has different output columns: @@ -4098,7 +4098,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset): class WIDERFaceDataset(MappableDataset, VisionBaseDataset): """ - A source dataset for reading and parsing WIDERFace dataset. + A source dataset that reads and parses WIDERFace dataset. When usage is "train", "valid" or "all", the generated dataset has eight columns ["image", "bbox", "blur", "expression", "illumination", "occlusion", "pose", "invalid"]. When usage is "test", it only has one column