From a32883ed047e1deed000cfaccfcdbfdee8464703 Mon Sep 17 00:00:00 2001 From: ms_yan Date: Wed, 15 Dec 2021 15:10:18 +0800 Subject: [PATCH] fix api description error --- .../dataset/mindspore.dataset.MindDataset.rst | 6 +++--- mindspore/dataset/text/utils.py | 12 ++++++------ mindspore/dataset/transforms/c_transforms.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst b/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst index 8a40f312597..1f3ebc74d26 100644 --- a/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst +++ b/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst @@ -1,13 +1,13 @@ mindspore.dataset.MindDataset ============================== -.. py:class:: mindspore.dataset.MindDataset(dataset_file, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None, shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None) +.. py:class:: mindspore.dataset.MindDataset(dataset_files, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None, shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None) 读取和解析MindRecord数据文件作为源数据集。生成的数据集的列名和列类型取决于MindRecord文件中的保存的列名与类型。 **参数:** - - **dataset_file** (Union[str, list[str]]) - MindRecord文件路径,支持单文件路径字符串、多文件路径字符串列表。如果 `dataset_file` 的类型是字符串,则它代表一组具有相同前缀名的MindRecord文件,同一路径下具有相同前缀名的其他MindRecord文件将会被自动寻找并加载。如果 `dataset_file` 的类型是列表,则它表示所需读取的MindRecord数据文件。 + - **dataset_files** (Union[str, list[str]]) - MindRecord文件路径,支持单文件路径字符串、多文件路径字符串列表。如果 `dataset_files` 的类型是字符串,则它代表一组具有相同前缀名的MindRecord文件,同一路径下具有相同前缀名的其他MindRecord文件将会被自动寻找并加载。如果 `dataset_files` 的类型是列表,则它表示所需读取的MindRecord数据文件。 - **columns_list** (list[str],可选) - 指定从MindRecord文件中读取的数据列(默认为None,读取所有列)。 - **num_parallel_workers** (int,可选) - 指定读取数据的工作线程数(默认值None,即使用mindspore.dataset.config中配置的线程数)。 - **shuffle** (Union[bool, Shuffle level], 可选) - 每个epoch中数据混洗的模式(默认为为mindspore.dataset.Shuffle.GLOBAL)。如果为False,则不混洗;如果为True,等同于将 `shuffle` 设置为mindspore.dataset.Shuffle.GLOBAL。另外也可以传入枚举变量设置shuffle级别: @@ -63,7 +63,7 @@ **样例:** >>> mind_dataset_dir = ["/path/to/mind_dataset_file"] # 此列表可以包含1个或多个MindRecord文件 - >>> dataset = ds.MindDataset(dataset_file=mind_dataset_dir) + >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir) .. include:: mindspore.dataset.Dataset.add_sampler.rst diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py index 9a61ea207b3..56caf3e1ea5 100644 --- a/mindspore/dataset/text/utils.py +++ b/mindspore/dataset/text/utils.py @@ -46,8 +46,8 @@ class Vocab(cde.Vocab): This would collect all unique words in a dataset and return a vocab within the frequency range specified by user in freq_range. User would be warned if no words fall into the frequency. - Words in vocab are ordered from highest frequency to lowest frequency. Words with the same frequency would be - ordered lexicographically. + Words in vocab are ordered from the highest frequency to the lowest frequency. Words with the same frequency + would be ordered lexicographically. Args: dataset(Dataset): dataset to build vocab from. @@ -86,7 +86,7 @@ class Vocab(cde.Vocab): Args: word_list(list): A list of string where each element is a word of type string. - special_tokens(list, optional): A list of strings, each one is a special token. for example + special_tokens(list, optional): A list of strings, each one is a special token. For example special_tokens=["",""] (default=None, no special tokens will be added). special_first(bool, optional): Whether special_tokens is prepended or appended to vocab. If special_tokens is specified and special_first is set to True, special_tokens will be prepended (default=True). @@ -112,7 +112,7 @@ class Vocab(cde.Vocab): delimiter (str, optional): A delimiter to break up each line in file, the first element is taken to be the word (default=""). vocab_size (int, optional): Number of words to read from file_path (default=None, all words are taken). - special_tokens (list, optional): A list of strings, each one is a special token. for example + special_tokens (list, optional): A list of strings, each one is a special token. For example special_tokens=["",""] (default=None, no special tokens will be added). special_first (bool, optional): Whether special_tokens will be prepended/appended to vocab, If special_tokens is specified and special_first is set to True, @@ -262,7 +262,7 @@ def to_str(array, encoding='utf8'): Args: array (numpy.ndarray): Array of `bytes` type representing strings. - encoding (str): Indicating the charset for decoding. + encoding (str): Indicating the charset for decoding (default='utf8'). Returns: numpy.ndarray, NumPy array of `str`. @@ -286,7 +286,7 @@ def to_bytes(array, encoding='utf8'): Args: array (numpy.ndarray): Array of `str` type representing strings. - encoding (str): Indicating the charset for encoding. + encoding (str): Indicating the charset for encoding (default='utf8'). Returns: numpy.ndarray, NumPy array of `bytes`. diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index ddeec8c3b1b..ec320f39fb0 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -286,7 +286,7 @@ class PadEnd(TensorOperation): Args: pad_shape (list(int)): List of integers representing the shape needed. Dimensions that set to `None` will not be padded (i.e., original dim will be used). Shorter dimensions will truncate the values. - pad_value (Union[str, bytes, int, float, bool]), optional): Value used to pad. Default to 0 or empty + pad_value (Union[str, bytes, int, float, bool], optional): Value used to pad. Default to 0 or empty string in case of tensors of strings. Examples: