modify comment for api

2020-08-13 21:38:35 +08:00 · 2020-08-13 21:38:35 +08:00 · 501f549bc9
parent 8c377fd159
commit 501f549bc9
9 changed files with 29 additions and 21 deletions
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -266,9 +266,9 @@ class Dataset:
                be dropped and not propagated to the child node.
            num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel (default=None).
            per_batch_map (callable, optional): Per batch map callable. A callable which takes
-                (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represent a batch of
-                Tensors on a given column. The number of lists should match with number of entries in input_columns. The
-                last parameter of the callable should always be a BatchInfo object.
+                (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represents a batch
+                of Tensors on a given column. The number of lists should match with number of entries in input_columns.
+                The last parameter of the callable should always be a BatchInfo object.
            input_columns (list[str], optional): List of names of the input columns. The size of the list should
                match with signature of per_batch_map callable.
            pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
@ -2734,7 +2734,7 @@ class MnistDataset(MappableDataset):
        num_samples (int, optional): The number of images to be included in the dataset
            (default=None, all images).
        num_parallel_workers (int, optional): Number of workers to read the data
-            (default=value, set in the config).
+            (default=None, set in the config).
        shuffle (bool, optional): Whether or not to perform shuffle on the dataset
            (default=None, expected order behavior shown in the table).
        sampler (Sampler, optional): Object used to choose samples from the
@ -2811,7 +2811,7 @@ class MnistDataset(MappableDataset):

 class MindDataset(MappableDataset):
    """
-    A source dataset that reads from shard files and database.
+    A source dataset that reads MindRecord files.

    Args:
        dataset_file (Union[str, list[str]]): One of file names or file list in dataset.
@ -3138,7 +3138,7 @@ class _GeneratorWorker(multiprocessing.Process):

 class GeneratorDataset(MappableDataset):
    """
-    A source dataset that generate data from python by invoking python data source each epoch.
+    A source dataset that generates data from python by invoking python data source each epoch.

    This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
    below shows what input args are allowed and their expected behavior.
@ -3351,7 +3351,7 @@ class TFRecordDataset(SourceDataset):

    Args:
        dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
-        pattern of files. The list will be sorted in a lexicographical order.
+            pattern of files. The list will be sorted in a lexicographical order.
        schema (Union[str, Schema], optional): Path to the json schema file or schema object (default=None).
            If the schema is not provided, the meta data from the TFData file is considered the schema.
        columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
@ -3534,7 +3534,7 @@ class ManifestDataset(MappableDataset):

    Args:
        dataset_file (str): File to be read.
-        usage (str, optional): Need train, eval or inference data (default="train").
+        usage (str, optional): acceptable usages include train, eval and inference (default="train").
        num_samples (int, optional): The number of images to be included in the dataset.
            (default=None, all images).
        num_parallel_workers (int, optional): Number of workers to read the data
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@ -72,6 +72,7 @@ class Lookup(cde.LookupOp):
    def __init__(self, vocab, unknown_token=None):
        super().__init__(vocab, unknown_token)

+
 class SlidingWindow(cde.SlidingWindowOp):
    """
    TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis
@ -101,6 +102,7 @@ class SlidingWindow(cde.SlidingWindowOp):
    def __init__(self, width, axis=0):
        super().__init__(width=width, axis=axis)

+
 class Ngram(cde.NgramOp):
    """
    TensorOp to generate n-gram from a 1-D string Tensor.
@ -511,8 +513,8 @@ if platform.system().lower() != 'windows':
                on input text to make the text to lower case and strip accents characters; If False, only apply
                NormalizeUTF8('normalization_form' mode) operation on input text(default=False).
            keep_whitespace(bool, optional): If True, the whitespace will be kept in out tokens(default=False).
-            normalization_form(NormalizeForm, optional): Used to specify a specific normlaize mode,
-                only effective when 'lower_case' is False. See NormalizeUTF8 for details(default='NONE').
+            normalization_form(NormalizeForm, optional): Used to specify a specific normalize mode,
+                only effective when 'lower_case' is False. See NormalizeUTF8 for details(default=NormalizeForm.NONE).
            preserve_unused_token(bool, optional): If True, do not split special tokens like
                '[CLS]', '[SEP]', '[UNK]', '[PAD]', '[MASK]'(default=True).
            with_offsets (bool, optional): If or not output offsets of tokens (default=False).
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@ -132,12 +132,13 @@ class Vocab(cde.Vocab):
        Build a vocab object from a dict.

        Args:
-            word_dict (dict): dict contains word, id pairs where word should be str and id int. id is recommended to
-                start from 0 and be continuous. ValueError will be raised if id is negative.
+            word_dict (dict): dict contains word and id pairs, where word should be str and id be int. id is recommended
+                to start from 0 and be continuous. ValueError will be raised if id is negative.
        """

        return super().from_dict(word_dict)

+
 class SentencePieceVocab(cde.SentencePieceVocab):
    """
    SentencePiece obiect that is used to segmentate words
@ -151,9 +152,9 @@ class SentencePieceVocab(cde.SentencePieceVocab):
        Args:
            dataset(Dataset): Dataset to build sentencepiece.
            col_names(list): The list of the col name.
-            vocab_size(int): Vocabulary size, the type of uint32_t.
+            vocab_size(int): Vocabulary size.
            character_coverage(float): Amount of characters covered by the model, good defaults are: 0.9995 for
-                languages. with rich character set like Japanse or Chinese and 1.0 for other languages with small
+                languages. with rich character set like Japanese or Chinese and 1.0 for other languages with small
                character set.
            model_type(SentencePieceModel): Choose from unigram (default), bpe, char, or word. The input sentence
                must be pretokenized when using word type.
@ -261,6 +262,7 @@ class NormalizeForm(IntEnum):
    NFD = 3
    NFKD = 4

+
 class SentencePieceModel(IntEnum):
    """An enumeration for SentencePieceModel, effective enumeration types are UNIGRAM, BPE, CHAR, WORD."""
    UNIGRAM = 0
@ -275,11 +277,13 @@ DE_C_INTER_SENTENCEPIECE_MODE = {
    SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
 }

+
 class SPieceTokenizerOutType(IntEnum):
    """An enumeration for SPieceTokenizerOutType, effective enumeration types are STRING, INT."""
    STRING = 0
    INT = 1

+
 class SPieceTokenizerLoadType(IntEnum):
    """An enumeration for SPieceTokenizerLoadType, effective enumeration types are FILE, MODEL."""
    FILE = 0
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@ -204,7 +204,7 @@ class Concatenate(cde.ConcatenateOp):
    Tensor operation that concatenates all columns into a single tensor.

    Args:
-        axis (int, optional): axis to concatenate the tensors along (Default=0).
+        axis (int, optional): concatenate the tensors along given axis (Default=0).
        prepend (numpy.array, optional): numpy array to be prepended to the already concatenated tensors (Default=None).
        append (numpy.array, optional): numpy array to be appended to the already concatenated tensors (Default=None).
    """
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@ -188,8 +188,8 @@ class Normalize(cde.NormalizeOp):
    Normalize the input image with respect to mean and standard deviation.

    Args:
-        mean (sequence): List or tuple of mean values for each channel, w.r.t channel order.
-        std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order.
+        mean (sequence): List or tuple of mean values for each channel, with respect to channel order.
+        std (sequence): List or tuple of standard deviations for each channel, with respect to channel order.
    """

    @check_normalize_c
--- a/mindspore/mindrecord/filereader.py
+++ b/mindspore/mindrecord/filereader.py
@ -23,6 +23,7 @@ from .common.exceptions import ParamValueError, ParamTypeError

 __all__ = ['FileReader']

+
 class FileReader:
    """
    Class to read MindRecord File series.
@ -31,7 +32,7 @@ class FileReader:
       file_name (str, list[str]): One of MindRecord File or file list.
       num_consumer(int, optional): Number of consumer threads which load data to memory (default=4).
           It should not be smaller than 1 or larger than the number of CPU.
-       columns (list[str], optional): List of fields which correspond data would be read (default=None).
+       columns (list[str], optional): List of fields which corresponding data would be read (default=None).
       operator(int, optional): Reserved parameter for operators (default=None).

    Raises:
--- a/mindspore/mindrecord/filewriter.py
+++ b/mindspore/mindrecord/filewriter.py
@ -275,7 +275,7 @@ class FileWriter:

    def commit(self):
        """
-        Flush data to disk and generate the correspond db files.
+        Flush data to disk and generate the corresponding db files.

        Returns:
            MSRStatus, SUCCESS or FAILED.
--- a/mindspore/mindrecord/tools/imagenet_to_mr.py
+++ b/mindspore/mindrecord/tools/imagenet_to_mr.py
@ -25,12 +25,13 @@ from ..shardutils import check_filename

 __all__ = ['ImageNetToMR']

+
 class ImageNetToMR:
    """
    Class is for transformation from imagenet to MindRecord.

    Args:
-        map_file (str): the map file which indicate label.
+        map_file (str): the map file which indicates label.
                  the map file content should like this:

                  .. code-block::
--- a/mindspore/mindrecord/tools/mnist_to_mr.py
+++ b/mindspore/mindrecord/tools/mnist_to_mr.py
@ -37,7 +37,7 @@ class MnistToMR:
    Class is for transformation from Mnist to MindRecord.

    Args:
-        source (str): directory which contain t10k-images-idx3-ubyte.gz,
+        source (str): directory which contains t10k-images-idx3-ubyte.gz,
                      train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz,
                      train-labels-idx1-ubyte.gz.
        destination (str): the MindRecord file directory to transform into.