modify comment for api

This commit is contained in:
ms_yan 2020-08-13 21:38:35 +08:00
parent 8c377fd159
commit 501f549bc9
9 changed files with 29 additions and 21 deletions

View File

@ -266,9 +266,9 @@ class Dataset:
be dropped and not propagated to the child node.
num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel (default=None).
per_batch_map (callable, optional): Per batch map callable. A callable which takes
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represent a batch of
Tensors on a given column. The number of lists should match with number of entries in input_columns. The
last parameter of the callable should always be a BatchInfo object.
(list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch
of Tensors on a given column. The number of lists should match with number of entries in input_columns.
The last parameter of the callable should always be a BatchInfo object.
input_columns (list[str], optional): List of names of the input columns. The size of the list should
match with signature of per_batch_map callable.
pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
@ -2734,7 +2734,7 @@ class MnistDataset(MappableDataset):
num_samples (int, optional): The number of images to be included in the dataset
(default=None, all images).
num_parallel_workers (int, optional): Number of workers to read the data
(default=value, set in the config).
(default=None, set in the config).
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
(default=None, expected order behavior shown in the table).
sampler (Sampler, optional): Object used to choose samples from the
@ -2811,7 +2811,7 @@ class MnistDataset(MappableDataset):
class MindDataset(MappableDataset):
"""
A source dataset that reads from shard files and database.
A source dataset that reads MindRecord files.
Args:
dataset_file (Union[str, list[str]]): One of file names or file list in dataset.
@ -3138,7 +3138,7 @@ class _GeneratorWorker(multiprocessing.Process):
class GeneratorDataset(MappableDataset):
"""
A source dataset that generate data from python by invoking python data source each epoch.
A source dataset that generates data from python by invoking python data source each epoch.
This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
below shows what input args are allowed and their expected behavior.
@ -3351,7 +3351,7 @@ class TFRecordDataset(SourceDataset):
Args:
dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
pattern of files. The list will be sorted in a lexicographical order.
pattern of files. The list will be sorted in a lexicographical order.
schema (Union[str, Schema], optional): Path to the json schema file or schema object (default=None).
If the schema is not provided, the meta data from the TFData file is considered the schema.
columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
@ -3534,7 +3534,7 @@ class ManifestDataset(MappableDataset):
Args:
dataset_file (str): File to be read.
usage (str, optional): Need train, eval or inference data (default="train").
usage (str, optional): acceptable usages include train, eval and inference (default="train").
num_samples (int, optional): The number of images to be included in the dataset.
(default=None, all images).
num_parallel_workers (int, optional): Number of workers to read the data

View File

@ -72,6 +72,7 @@ class Lookup(cde.LookupOp):
def __init__(self, vocab, unknown_token=None):
super().__init__(vocab, unknown_token)
class SlidingWindow(cde.SlidingWindowOp):
"""
TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis
@ -101,6 +102,7 @@ class SlidingWindow(cde.SlidingWindowOp):
def __init__(self, width, axis=0):
super().__init__(width=width, axis=axis)
class Ngram(cde.NgramOp):
"""
TensorOp to generate n-gram from a 1-D string Tensor.
@ -511,8 +513,8 @@ if platform.system().lower() != 'windows':
on input text to make the text to lower case and strip accents characters; If False, only apply
NormalizeUTF8('normalization_form' mode) operation on input text(default=False).
keep_whitespace(bool, optional): If True, the whitespace will be kept in out tokens(default=False).
normalization_form(NormalizeForm, optional): Used to specify a specific normlaize mode,
only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
normalization_form(NormalizeForm, optional): Used to specify a specific normalize mode,
only effective when 'lower_case' is False. See NormalizeUTF8 for details(default=NormalizeForm.NONE).
preserve_unused_token(bool, optional): If True, do not split special tokens like
'[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
with_offsets (bool, optional): If or not output offsets of tokens (default=False).

View File

@ -132,12 +132,13 @@ class Vocab(cde.Vocab):
Build a vocab object from a dict.
Args:
word_dict (dict): dict contains word, id pairs where word should be str and id int. id is recommended to
start from 0 and be continuous. ValueError will be raised if id is negative.
word_dict (dict): dict contains word and id pairs, where word should be str and id be int. id is recommended
to start from 0 and be continuous. ValueError will be raised if id is negative.
"""
return super().from_dict(word_dict)
class SentencePieceVocab(cde.SentencePieceVocab):
"""
SentencePiece obiect that is used to segmentate words
@ -151,9 +152,9 @@ class SentencePieceVocab(cde.SentencePieceVocab):
Args:
dataset(Dataset): Dataset to build sentencepiece.
col_names(list): The list of the col name.
vocab_size(int): Vocabulary size, the type of uint32_t.
vocab_size(int): Vocabulary size.
character_coverage(float): Amount of characters covered by the model, good defaults are: 0.9995 for
languages. with rich character set like Japanse or Chinese and 1.0 for other languages with small
languages. with rich character set like Japanese or Chinese and 1.0 for other languages with small
character set.
model_type(SentencePieceModel): Choose from unigram (default), bpe, char, or word. The input sentence
must be pretokenized when using word type.
@ -261,6 +262,7 @@ class NormalizeForm(IntEnum):
NFD = 3
NFKD = 4
class SentencePieceModel(IntEnum):
"""An enumeration for SentencePieceModel, effective enumeration types are UNIGRAM, BPE, CHAR, WORD."""
UNIGRAM = 0
@ -275,11 +277,13 @@ DE_C_INTER_SENTENCEPIECE_MODE = {
SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
}
class SPieceTokenizerOutType(IntEnum):
"""An enumeration for SPieceTokenizerOutType, effective enumeration types are STRING, INT."""
STRING = 0
INT = 1
class SPieceTokenizerLoadType(IntEnum):
"""An enumeration for SPieceTokenizerLoadType, effective enumeration types are FILE, MODEL."""
FILE = 0

View File

@ -204,7 +204,7 @@ class Concatenate(cde.ConcatenateOp):
Tensor operation that concatenates all columns into a single tensor.
Args:
axis (int, optional): axis to concatenate the tensors along (Default=0).
axis (int, optional): concatenate the tensors along given axis (Default=0).
prepend (numpy.array, optional): numpy array to be prepended to the already concatenated tensors (Default=None).
append (numpy.array, optional): numpy array to be appended to the already concatenated tensors (Default=None).
"""

View File

@ -188,8 +188,8 @@ class Normalize(cde.NormalizeOp):
Normalize the input image with respect to mean and standard deviation.
Args:
mean (sequence): List or tuple of mean values for each channel, w.r.t channel order.
std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order.
mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
"""
@check_normalize_c

View File

@ -23,6 +23,7 @@ from .common.exceptions import ParamValueError, ParamTypeError
__all__ = ['FileReader']
class FileReader:
"""
Class to read MindRecord File series.
@ -31,7 +32,7 @@ class FileReader:
file_name (str, list[str]): One of MindRecord File or file list.
num_consumer(int, optional): Number of consumer threads which load data to memory (default=4).
It should not be smaller than 1 or larger than the number of CPU.
columns (list[str], optional): List of fields which correspond data would be read (default=None).
columns (list[str], optional): List of fields which corresponding data would be read (default=None).
operator(int, optional): Reserved parameter for operators (default=None).
Raises:

View File

@ -275,7 +275,7 @@ class FileWriter:
def commit(self):
"""
Flush data to disk and generate the correspond db files.
Flush data to disk and generate the corresponding db files.
Returns:
MSRStatus, SUCCESS or FAILED.

View File

@ -25,12 +25,13 @@ from ..shardutils import check_filename
__all__ = ['ImageNetToMR']
class ImageNetToMR:
"""
Class is for transformation from imagenet to MindRecord.
Args:
map_file (str): the map file which indicate label.
map_file (str): the map file which indicates label.
the map file content should like this:
.. code-block::

View File

@ -37,7 +37,7 @@ class MnistToMR:
Class is for transformation from Mnist to MindRecord.
Args:
source (str): directory which contain t10k-images-idx3-ubyte.gz,
source (str): directory which contains t10k-images-idx3-ubyte.gz,
train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz,
train-labels-idx1-ubyte.gz.
destination (str): the MindRecord file directory to transform into.