forked from mindspore-Ecosystem/mindspore
modify comment for api
This commit is contained in:
parent
8c377fd159
commit
501f549bc9
|
@ -266,9 +266,9 @@ class Dataset:
|
|||
be dropped and not propagated to the child node.
|
||||
num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel (default=None).
|
||||
per_batch_map (callable, optional): Per batch map callable. A callable which takes
|
||||
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represent a batch of
|
||||
Tensors on a given column. The number of lists should match with number of entries in input_columns. The
|
||||
last parameter of the callable should always be a BatchInfo object.
|
||||
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch
|
||||
of Tensors on a given column. The number of lists should match with number of entries in input_columns.
|
||||
The last parameter of the callable should always be a BatchInfo object.
|
||||
input_columns (list[str], optional): List of names of the input columns. The size of the list should
|
||||
match with signature of per_batch_map callable.
|
||||
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
|
||||
|
@ -2734,7 +2734,7 @@ class MnistDataset(MappableDataset):
|
|||
num_samples (int, optional): The number of images to be included in the dataset
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
(default=value, set in the config).
|
||||
(default=None, set in the config).
|
||||
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
|
||||
(default=None, expected order behavior shown in the table).
|
||||
sampler (Sampler, optional): Object used to choose samples from the
|
||||
|
@ -2811,7 +2811,7 @@ class MnistDataset(MappableDataset):
|
|||
|
||||
class MindDataset(MappableDataset):
|
||||
"""
|
||||
A source dataset that reads from shard files and database.
|
||||
A source dataset that reads MindRecord files.
|
||||
|
||||
Args:
|
||||
dataset_file (Union[str, list[str]]): One of file names or file list in dataset.
|
||||
|
@ -3138,7 +3138,7 @@ class _GeneratorWorker(multiprocessing.Process):
|
|||
|
||||
class GeneratorDataset(MappableDataset):
|
||||
"""
|
||||
A source dataset that generate data from python by invoking python data source each epoch.
|
||||
A source dataset that generates data from python by invoking python data source each epoch.
|
||||
|
||||
This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
|
||||
below shows what input args are allowed and their expected behavior.
|
||||
|
@ -3351,7 +3351,7 @@ class TFRecordDataset(SourceDataset):
|
|||
|
||||
Args:
|
||||
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
|
||||
pattern of files. The list will be sorted in a lexicographical order.
|
||||
pattern of files. The list will be sorted in a lexicographical order.
|
||||
schema (Union[str, Schema], optional): Path to the json schema file or schema object (default=None).
|
||||
If the schema is not provided, the meta data from the TFData file is considered the schema.
|
||||
columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
|
||||
|
@ -3534,7 +3534,7 @@ class ManifestDataset(MappableDataset):
|
|||
|
||||
Args:
|
||||
dataset_file (str): File to be read.
|
||||
usage (str, optional): Need train, eval or inference data (default="train").
|
||||
usage (str, optional): acceptable usages include train, eval and inference (default="train").
|
||||
num_samples (int, optional): The number of images to be included in the dataset.
|
||||
(default=None, all images).
|
||||
num_parallel_workers (int, optional): Number of workers to read the data
|
||||
|
|
|
@ -72,6 +72,7 @@ class Lookup(cde.LookupOp):
|
|||
def __init__(self, vocab, unknown_token=None):
|
||||
super().__init__(vocab, unknown_token)
|
||||
|
||||
|
||||
class SlidingWindow(cde.SlidingWindowOp):
|
||||
"""
|
||||
TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis
|
||||
|
@ -101,6 +102,7 @@ class SlidingWindow(cde.SlidingWindowOp):
|
|||
def __init__(self, width, axis=0):
|
||||
super().__init__(width=width, axis=axis)
|
||||
|
||||
|
||||
class Ngram(cde.NgramOp):
|
||||
"""
|
||||
TensorOp to generate n-gram from a 1-D string Tensor.
|
||||
|
@ -511,8 +513,8 @@ if platform.system().lower() != 'windows':
|
|||
on input text to make the text to lower case and strip accents characters; If False, only apply
|
||||
NormalizeUTF8('normalization_form' mode) operation on input text(default=False).
|
||||
keep_whitespace(bool, optional): If True, the whitespace will be kept in out tokens(default=False).
|
||||
normalization_form(NormalizeForm, optional): Used to specify a specific normlaize mode,
|
||||
only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
|
||||
normalization_form(NormalizeForm, optional): Used to specify a specific normalize mode,
|
||||
only effective when 'lower_case' is False. See NormalizeUTF8 for details(default=NormalizeForm.NONE).
|
||||
preserve_unused_token(bool, optional): If True, do not split special tokens like
|
||||
'[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
|
||||
with_offsets (bool, optional): If or not output offsets of tokens (default=False).
|
||||
|
|
|
@ -132,12 +132,13 @@ class Vocab(cde.Vocab):
|
|||
Build a vocab object from a dict.
|
||||
|
||||
Args:
|
||||
word_dict (dict): dict contains word, id pairs where word should be str and id int. id is recommended to
|
||||
start from 0 and be continuous. ValueError will be raised if id is negative.
|
||||
word_dict (dict): dict contains word and id pairs, where word should be str and id be int. id is recommended
|
||||
to start from 0 and be continuous. ValueError will be raised if id is negative.
|
||||
"""
|
||||
|
||||
return super().from_dict(word_dict)
|
||||
|
||||
|
||||
class SentencePieceVocab(cde.SentencePieceVocab):
|
||||
"""
|
||||
SentencePiece obiect that is used to segmentate words
|
||||
|
@ -151,9 +152,9 @@ class SentencePieceVocab(cde.SentencePieceVocab):
|
|||
Args:
|
||||
dataset(Dataset): Dataset to build sentencepiece.
|
||||
col_names(list): The list of the col name.
|
||||
vocab_size(int): Vocabulary size, the type of uint32_t.
|
||||
vocab_size(int): Vocabulary size.
|
||||
character_coverage(float): Amount of characters covered by the model, good defaults are: 0.9995 for
|
||||
languages. with rich character set like Japanse or Chinese and 1.0 for other languages with small
|
||||
languages. with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
||||
character set.
|
||||
model_type(SentencePieceModel): Choose from unigram (default), bpe, char, or word. The input sentence
|
||||
must be pretokenized when using word type.
|
||||
|
@ -261,6 +262,7 @@ class NormalizeForm(IntEnum):
|
|||
NFD = 3
|
||||
NFKD = 4
|
||||
|
||||
|
||||
class SentencePieceModel(IntEnum):
|
||||
"""An enumeration for SentencePieceModel, effective enumeration types are UNIGRAM, BPE, CHAR, WORD."""
|
||||
UNIGRAM = 0
|
||||
|
@ -275,11 +277,13 @@ DE_C_INTER_SENTENCEPIECE_MODE = {
|
|||
SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
|
||||
}
|
||||
|
||||
|
||||
class SPieceTokenizerOutType(IntEnum):
|
||||
"""An enumeration for SPieceTokenizerOutType, effective enumeration types are STRING, INT."""
|
||||
STRING = 0
|
||||
INT = 1
|
||||
|
||||
|
||||
class SPieceTokenizerLoadType(IntEnum):
|
||||
"""An enumeration for SPieceTokenizerLoadType, effective enumeration types are FILE, MODEL."""
|
||||
FILE = 0
|
||||
|
|
|
@ -204,7 +204,7 @@ class Concatenate(cde.ConcatenateOp):
|
|||
Tensor operation that concatenates all columns into a single tensor.
|
||||
|
||||
Args:
|
||||
axis (int, optional): axis to concatenate the tensors along (Default=0).
|
||||
axis (int, optional): concatenate the tensors along given axis (Default=0).
|
||||
prepend (numpy.array, optional): numpy array to be prepended to the already concatenated tensors (Default=None).
|
||||
append (numpy.array, optional): numpy array to be appended to the already concatenated tensors (Default=None).
|
||||
"""
|
||||
|
|
|
@ -188,8 +188,8 @@ class Normalize(cde.NormalizeOp):
|
|||
Normalize the input image with respect to mean and standard deviation.
|
||||
|
||||
Args:
|
||||
mean (sequence): List or tuple of mean values for each channel, w.r.t channel order.
|
||||
std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order.
|
||||
mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
|
||||
std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
|
||||
"""
|
||||
|
||||
@check_normalize_c
|
||||
|
|
|
@ -23,6 +23,7 @@ from .common.exceptions import ParamValueError, ParamTypeError
|
|||
|
||||
__all__ = ['FileReader']
|
||||
|
||||
|
||||
class FileReader:
|
||||
"""
|
||||
Class to read MindRecord File series.
|
||||
|
@ -31,7 +32,7 @@ class FileReader:
|
|||
file_name (str, list[str]): One of MindRecord File or file list.
|
||||
num_consumer(int, optional): Number of consumer threads which load data to memory (default=4).
|
||||
It should not be smaller than 1 or larger than the number of CPU.
|
||||
columns (list[str], optional): List of fields which correspond data would be read (default=None).
|
||||
columns (list[str], optional): List of fields which corresponding data would be read (default=None).
|
||||
operator(int, optional): Reserved parameter for operators (default=None).
|
||||
|
||||
Raises:
|
||||
|
|
|
@ -275,7 +275,7 @@ class FileWriter:
|
|||
|
||||
def commit(self):
|
||||
"""
|
||||
Flush data to disk and generate the correspond db files.
|
||||
Flush data to disk and generate the corresponding db files.
|
||||
|
||||
Returns:
|
||||
MSRStatus, SUCCESS or FAILED.
|
||||
|
|
|
@ -25,12 +25,13 @@ from ..shardutils import check_filename
|
|||
|
||||
__all__ = ['ImageNetToMR']
|
||||
|
||||
|
||||
class ImageNetToMR:
|
||||
"""
|
||||
Class is for transformation from imagenet to MindRecord.
|
||||
|
||||
Args:
|
||||
map_file (str): the map file which indicate label.
|
||||
map_file (str): the map file which indicates label.
|
||||
the map file content should like this:
|
||||
|
||||
.. code-block::
|
||||
|
|
|
@ -37,7 +37,7 @@ class MnistToMR:
|
|||
Class is for transformation from Mnist to MindRecord.
|
||||
|
||||
Args:
|
||||
source (str): directory which contain t10k-images-idx3-ubyte.gz,
|
||||
source (str): directory which contains t10k-images-idx3-ubyte.gz,
|
||||
train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz,
|
||||
train-labels-idx1-ubyte.gz.
|
||||
destination (str): the MindRecord file directory to transform into.
|
||||
|
|
Loading…
Reference in New Issue