forked from mindspore-Ecosystem/mindspore
!12291 fix some wrong descriptions of the API docs
From: @tiancixiao Reviewed-by: @pandoublefeng,@liucunwei Signed-off-by: @pandoublefeng,@liucunwei
This commit is contained in:
commit
2e9a52fc5c
|
@ -1732,10 +1732,7 @@ class MappableDataset(SourceDataset):
|
|||
new_sampler (Sampler): The sampler to use for the current dataset.
|
||||
|
||||
Examples:
|
||||
>>> # Note: A SequentialSampler is created by default
|
||||
>>> dataset = ds.ImageFolderDataset(image_folder_dataset_dir)
|
||||
>>>
|
||||
>>> # Use a DistributedSampler instead of the SequentialSampler
|
||||
>>> # use a DistributedSampler instead
|
||||
>>> new_sampler = ds.DistributedSampler(10, 2)
|
||||
>>> dataset.use_sampler(new_sampler)
|
||||
"""
|
||||
|
@ -2888,15 +2885,15 @@ class MnistDataset(MappableDataset):
|
|||
|
||||
The generated dataset has two columns ['image', 'label'].
|
||||
The type of the image tensor is uint8. The label is a scalar uint32 tensor.
|
||||
This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table
|
||||
This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
|
||||
below shows what input arguments are allowed and their expected behavior.
|
||||
|
||||
.. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
|
||||
:widths: 25 25 50
|
||||
:header-rows: 1
|
||||
|
||||
* - Parameter 'sampler'
|
||||
- Parameter 'shuffle'
|
||||
* - Parameter `sampler`
|
||||
- Parameter `shuffle`
|
||||
- Expected Order Behavior
|
||||
* - None
|
||||
- None
|
||||
|
@ -2937,19 +2934,19 @@ class MnistDataset(MappableDataset):
|
|||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
usage (str, optional): Usage of this dataset, can be "train", "test" or "all" . "train" will read from 60,000
|
||||
train samples, "test" will read from 10,000 test samples, "all" will read from all 70,000 samples.
|
||||
(default=None, all samples)
|
||||
(default=None, will read all samples)
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
(default=None, will read all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
(default=None, set in the config).
|
||||
(default=None, will use value set in the config).
|
||||
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
|
||||
(default=None, expected order behavior shown in the table).
|
||||
sampler (Sampler, optional): Object used to choose samples from the
|
||||
dataset (default=None, expected order behavior shown in the table).
|
||||
num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
|
||||
When this argument is specified, 'num_samples' reflects the max sample number of per shard.
|
||||
shard_id (int, optional): The shard ID within num_shards (default=None). This
|
||||
argument can only be specified when num_shards is also specified.
|
||||
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
||||
shard_id (int, optional): The shard ID within `num_shards` (default=None). This
|
||||
argument can only be specified when `num_shards` is also specified.
|
||||
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
||||
(default=None, which means no cache is used).
|
||||
|
||||
|
@ -3587,15 +3584,15 @@ class ManifestDataset(MappableDataset):
|
|||
The shape of the image column is [image_size] if decode flag is False, or [H,W,C]
|
||||
otherwise.
|
||||
The type of the image tensor is uint8. The label is a scalar uint64 tensor.
|
||||
This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table
|
||||
This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
|
||||
below shows what input arguments are allowed and their expected behavior.
|
||||
|
||||
.. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
|
||||
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
||||
:widths: 25 25 50
|
||||
:header-rows: 1
|
||||
|
||||
* - Parameter 'sampler'
|
||||
- Parameter 'shuffle'
|
||||
* - Parameter `sampler`
|
||||
- Parameter `shuffle`
|
||||
- Expected Order Behavior
|
||||
* - None
|
||||
- None
|
||||
|
@ -3618,11 +3615,11 @@ class ManifestDataset(MappableDataset):
|
|||
|
||||
Args:
|
||||
dataset_file (str): File to be read.
|
||||
usage (str, optional): acceptable usages include train, eval and inference (default="train").
|
||||
usage (str, optional): Acceptable usages include "train", "eval" and "inference" (default="train").
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
(default=None, will include all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
(default=None, number set in the config).
|
||||
(default=None, will use value set in the config).
|
||||
shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
|
||||
order behavior shown in the table).
|
||||
sampler (Sampler, optional): Object used to choose samples from the
|
||||
|
@ -3632,10 +3629,10 @@ class ManifestDataset(MappableDataset):
|
|||
class will be given a unique index starting from 0).
|
||||
decode (bool, optional): decode the images after reading (default=False).
|
||||
num_shards (int, optional): Number of shards that the dataset will be divided
|
||||
into (default=None). When this argument is specified, 'num_samples' reflects
|
||||
into (default=None). When this argument is specified, `num_samples` reflects
|
||||
the max sample number of per shard.
|
||||
shard_id (int, optional): The shard ID within num_shards (default=None). This
|
||||
argument can only be specified when num_shards is also specified.
|
||||
shard_id (int, optional): The shard ID within `num_shards` (default=None). This
|
||||
argument can only be specified when `num_shards` is also specified.
|
||||
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
||||
(default=None, which means no cache is used).
|
||||
|
||||
|
@ -4195,7 +4192,8 @@ class CocoDataset(MappableDataset):
|
|||
"""
|
||||
A source dataset for reading and parsing COCO dataset.
|
||||
|
||||
CocoDataset support four kinds of task: 2017 Train/Val/Test Detection, Keypoints, Stuff, Panoptic.
|
||||
`CocoDataset` supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and
|
||||
Panoptic Segmentation of 2017 Train/Val/Test dataset.
|
||||
|
||||
The generated dataset has multi-columns :
|
||||
|
||||
|
@ -4339,11 +4337,12 @@ class CocoDataset(MappableDataset):
|
|||
|
||||
class CelebADataset(MappableDataset):
|
||||
"""
|
||||
A source dataset for reading and parsing CelebA dataset. Currently supported: list_attr_celeba.txt only.
|
||||
A source dataset for reading and parsing CelebA dataset. Only support to read `list_attr_celeba.txt` currently,
|
||||
which is the attribute annotations of the dataset.
|
||||
|
||||
Note:
|
||||
The generated dataset has two columns ['image', 'attr'].
|
||||
The type of the image tensor is uint8. The attribute tensor is uint32 and one hot type.
|
||||
The image tensor is of the uint8 type. The attribute tensor is of the uint32 type and one hot encoded.
|
||||
|
||||
Citation of CelebA dataset.
|
||||
|
||||
|
@ -4376,20 +4375,20 @@ class CelebADataset(MappableDataset):
|
|||
|
||||
Args:
|
||||
dataset_dir (str): Path to the root directory that contains the dataset.
|
||||
num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in
|
||||
the config).
|
||||
shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None).
|
||||
usage (str): one of 'all', 'train', 'valid' or 'test'.
|
||||
usage (str): one of 'all', 'train', 'valid' or 'test' (default='all', will read all samples).
|
||||
sampler (Sampler, optional): Object used to choose samples from the dataset (default=None).
|
||||
decode (bool, optional): decode the images after reading (default=False).
|
||||
extensions (list[str], optional): List of file extensions to be
|
||||
included in the dataset (default=None).
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
extensions (list[str], optional): List of file extensions to be included in the dataset (default=None).
|
||||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, will include all images).
|
||||
num_shards (int, optional): Number of shards that the dataset will be divided
|
||||
into (default=None). When this argument is specified, 'num_samples' reflects
|
||||
into (default=None). When this argument is specified, `num_samples` reflects
|
||||
the max sample number of per shard.
|
||||
shard_id (int, optional): The shard ID within num_shards (default=None). This
|
||||
argument can only be specified when num_shards is also specified.
|
||||
shard_id (int, optional): The shard ID within `num_shards` (default=None). This
|
||||
argument can only be specified when `num_shards` is also specified.
|
||||
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
||||
(default=None, which means no cache is used).
|
||||
|
||||
|
|
|
@ -63,6 +63,7 @@ class TextTensorOperation(TensorOperation):
|
|||
"""
|
||||
Base class of Text Tensor Ops
|
||||
"""
|
||||
|
||||
def __call__(self, input_tensor):
|
||||
if not isinstance(input_tensor, list):
|
||||
input_list = [input_tensor]
|
||||
|
@ -95,13 +96,11 @@ DE_C_INTER_JIEBA_MODE = {
|
|||
JiebaMode.HMM: cde.JiebaMode.DE_JIEBA_HMM
|
||||
}
|
||||
|
||||
|
||||
DE_C_INTER_SENTENCEPIECE_LOADTYPE = {
|
||||
SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE,
|
||||
SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL
|
||||
}
|
||||
|
||||
|
||||
DE_C_INTER_SENTENCEPIECE_OUTTYPE = {
|
||||
SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString,
|
||||
SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT
|
||||
|
@ -282,7 +281,7 @@ class Lookup(TextTensorOperation):
|
|||
vocab (Vocab): A vocabulary object.
|
||||
unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV).
|
||||
If unknown_token is OOV, a runtime error will be thrown (default=None).
|
||||
data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32)
|
||||
data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mindspore.int32)
|
||||
|
||||
Examples:
|
||||
>>> # Load vocabulary from list
|
||||
|
@ -309,18 +308,19 @@ class Ngram(TextTensorOperation):
|
|||
Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works.
|
||||
|
||||
Args:
|
||||
n (list[int]): n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result
|
||||
n (list[int]): n in n-gram, which is a list of positive integers. For example, if n=[4, 3], then the result
|
||||
would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up
|
||||
for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in
|
||||
for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore", "best"] will result in
|
||||
an empty string produced.
|
||||
left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width
|
||||
will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None).
|
||||
right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence.
|
||||
pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--"
|
||||
(default=None).
|
||||
separator (str, optional): symbol used to join strings together. For example. if 2-gram is
|
||||
left_pad (tuple, optional): Padding performed on left side of the sequence shaped like ("pad_token", pad_width).
|
||||
`pad_width` will be capped at n-1. For example, specifying left_pad=("_", 2) would pad left side of the
|
||||
sequence with "__" (default=None).
|
||||
right_pad (tuple, optional): Padding performed on right side of the sequence shaped like
|
||||
("pad_token", pad_width). `pad_width` will be capped at n-1. For example, specifying right_pad=("-", 2)
|
||||
would pad right side of the sequence with "--" (default=None).
|
||||
separator (str, optional): Symbol used to join strings together. For example. if 2-gram is
|
||||
["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"]
|
||||
(default=None, which means whitespace is used).
|
||||
(default=None, which will use whitespace as separator).
|
||||
|
||||
Examples:
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=text.Ngram(3, separator=""))
|
||||
|
@ -389,6 +389,7 @@ class SlidingWindow(TextTensorOperation):
|
|||
>>> # | [3,4,5]] |
|
||||
>>> # +--------------+
|
||||
"""
|
||||
|
||||
@check_slidingwindow
|
||||
def __init__(self, width, axis=0):
|
||||
self.width = width
|
||||
|
@ -557,6 +558,7 @@ class PythonTokenizer:
|
|||
tokens = self.tokenizer(in_array)
|
||||
return tokens
|
||||
|
||||
|
||||
if platform.system().lower() != 'windows':
|
||||
DE_C_INTER_NORMALIZE_FORM = {
|
||||
NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE,
|
||||
|
@ -575,12 +577,12 @@ if platform.system().lower() != 'windows':
|
|||
BasicTokenizer is not supported on Windows platform yet.
|
||||
|
||||
Args:
|
||||
lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation
|
||||
lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8 with `NFD` mode, RegexReplace operation
|
||||
on input text to fold the text to lower case and strip accents characters. If False, only apply
|
||||
NormalizeUTF8('normalization_form' mode) operation on input text (default=False).
|
||||
keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False).
|
||||
NormalizeUTF8 operation with the specified mode on input text (default=False).
|
||||
keep_whitespace (bool, optional): If True, the whitespace will be kept in output tokens (default=False).
|
||||
normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode. This is
|
||||
only effective when 'lower_case' is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE).
|
||||
only effective when `lower_case` is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE).
|
||||
preserve_unused_token (bool, optional): If True, do not split special tokens like
|
||||
'[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True).
|
||||
with_offsets (bool, optional): If or not output offsets of tokens (default=False).
|
||||
|
@ -637,14 +639,14 @@ if platform.system().lower() != 'windows':
|
|||
vocab (Vocab): A vocabulary object.
|
||||
suffix_indicator (str, optional): Used to show that the subword is the last part of a word (default='##').
|
||||
max_bytes_per_token (int, optional): Tokens exceeding this length will not be further split (default=100).
|
||||
unknown_token (str, optional): When a token cannot be found: if 'unknown_token' is empty string,
|
||||
return the token directly, else return 'unknown_token'(default='[UNK]').
|
||||
lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation
|
||||
unknown_token (str, optional): When an unknown token is found, return the token directly if `unknown_token`
|
||||
is an empty string, else return `unknown_token` instead (default='[UNK]').
|
||||
lower_case (bool, optional): If True, apply CaseFold, NormalizeUTF8 with `NFD` mode, RegexReplace operation
|
||||
on input text to fold the text to lower case and strip accented characters. If False, only apply
|
||||
NormalizeUTF8('normalization_form' mode) operation on input text (default=False).
|
||||
NormalizeUTF8 operation with the specified mode on input text (default=False).
|
||||
keep_whitespace (bool, optional): If True, the whitespace will be kept in out tokens (default=False).
|
||||
normalization_form (NormalizeForm, optional): Used to specify a specific normalize mode,
|
||||
only effective when 'lower_case' is False. See NormalizeUTF8 for details (default='NONE').
|
||||
only effective when `lower_case` is False. See NormalizeUTF8 for details (default=NormalizeForm.NONE).
|
||||
preserve_unused_token (bool, optional): If True, do not split special tokens like
|
||||
'[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]' (default=True).
|
||||
with_offsets (bool, optional): If or not output offsets of tokens (default=False).
|
||||
|
@ -703,7 +705,8 @@ if platform.system().lower() != 'windows':
|
|||
|
||||
class CaseFold(TextTensorOperation):
|
||||
"""
|
||||
Apply case fold operation on UTF-8 string tensor.
|
||||
Apply case fold operation on UTF-8 string tensor, which is aggressive that can convert more characters into
|
||||
lower case.
|
||||
|
||||
Note:
|
||||
CaseFold is not supported on Windows platform yet.
|
||||
|
|
|
@ -59,23 +59,24 @@ class OneHot(cde.OneHotOp):
|
|||
|
||||
class Fill(cde.FillOp):
|
||||
"""
|
||||
Tensor operation to create a tensor filled with input scalar value.
|
||||
Tensor operation to fill all elements in the tensor with the specified value.
|
||||
The output tensor will have the same shape and type as the input tensor.
|
||||
|
||||
Args:
|
||||
fill_value (Union[str, bytes, int, float, bool])) : scalar value
|
||||
to fill created tensor with.
|
||||
to fill the tensor with.
|
||||
|
||||
Examples:
|
||||
>>> import numpy as np
|
||||
>>> from mindspore.dataset import GeneratorDataset
|
||||
>>> # Generate 1d int numpy array from 0 - 63
|
||||
>>> # generate a 1D integer numpy array from 0 to 4
|
||||
>>> def generator_1d():
|
||||
>>> for i in range(64):
|
||||
... for i in range(5):
|
||||
... yield (np.array([i]),)
|
||||
>>> generator_dataset = GeneratorDataset(generator_1d,column_names='col')
|
||||
>>> generator_dataset = ds.GeneratorDataset(generator_1d, column_names="col1")
|
||||
>>> # [[0], [1], [2], [3], [4]]
|
||||
>>> fill_op = c_transforms.Fill(3)
|
||||
>>> generator_dataset = generator_dataset.map(operations=fill_op)
|
||||
>>> # [[3], [3], [3], [3], [3]]
|
||||
"""
|
||||
|
||||
@check_fill_value
|
||||
|
@ -351,6 +352,8 @@ class Unique(cde.UniqueOp):
|
|||
>>> # +---------+-----------------+---------+
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Compose():
|
||||
"""
|
||||
Compose a list of transforms into a single transform.
|
||||
|
@ -376,6 +379,7 @@ class Compose():
|
|||
operations.append(op)
|
||||
return cde.ComposeOperation(operations)
|
||||
|
||||
|
||||
class RandomApply():
|
||||
"""
|
||||
Randomly perform a series of transforms with a given probability.
|
||||
|
|
|
@ -62,6 +62,7 @@ class ImageTensorOperation(TensorOperation):
|
|||
"""
|
||||
Base class of Image Tensor Ops
|
||||
"""
|
||||
|
||||
def __call__(self, input_tensor):
|
||||
if not isinstance(input_tensor, list):
|
||||
input_list = [input_tensor]
|
||||
|
@ -93,11 +94,9 @@ DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
|
|||
Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
|
||||
Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
|
||||
|
||||
|
||||
DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
|
||||
ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
|
||||
|
||||
|
||||
DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
|
||||
Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
|
||||
Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
|
||||
|
@ -307,6 +306,7 @@ class Equalize(ImageTensorOperation):
|
|||
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
|
||||
... input_columns=["image"])
|
||||
"""
|
||||
|
||||
def parse(self):
|
||||
return cde.EqualizeOperation()
|
||||
|
||||
|
@ -337,6 +337,7 @@ class Invert(ImageTensorOperation):
|
|||
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
|
||||
... input_columns=["image"])
|
||||
"""
|
||||
|
||||
def parse(self):
|
||||
return cde.InvertOperation()
|
||||
|
||||
|
@ -729,7 +730,7 @@ class RandomCrop(ImageTensorOperation):
|
|||
|
||||
class RandomCropDecodeResize(ImageTensorOperation):
|
||||
"""
|
||||
Equivalent to RandomResizedCrop, but crops before decodes.
|
||||
A combination of `Crop`, `Decode` and `Resize`. It will get better performance for JPEG images.
|
||||
|
||||
Args:
|
||||
size (Union[int, sequence]): The size of the output image.
|
||||
|
@ -813,7 +814,7 @@ class RandomCropWithBBox(ImageTensorOperation):
|
|||
|
||||
Examples:
|
||||
>>> decode_op = c_vision.Decode()
|
||||
>>> random_crop_with_bbox_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200])
|
||||
>>> random_crop_with_bbox_op = c_vision.RandomCropWithBBox([512, 512], [200, 200, 200, 200])
|
||||
>>> transforms_list = [decode_op, random_crop_with_bbox_op]
|
||||
>>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
|
||||
... input_columns=["image"])
|
||||
|
|
Loading…
Reference in New Issue