From a32883ed047e1deed000cfaccfcdbfdee8464703 Mon Sep 17 00:00:00 2001
From: ms_yan <yanpanhui@huawei.com>
Date: Wed, 15 Dec 2021 15:10:18 +0800
Subject: [PATCH] fix api description error

---
 .../dataset/mindspore.dataset.MindDataset.rst        |  6 +++---
 mindspore/dataset/text/utils.py                      | 12 ++++++------
 mindspore/dataset/transforms/c_transforms.py         |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst b/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst
index 8a40f312597..1f3ebc74d26 100644
--- a/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst
+++ b/docs/api/api_python/dataset/mindspore.dataset.MindDataset.rst
@@ -1,13 +1,13 @@
 ﻿mindspore.dataset.MindDataset
 ==============================
 
-.. py:class:: mindspore.dataset.MindDataset(dataset_file, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None, shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None)
+.. py:class:: mindspore.dataset.MindDataset(dataset_files, columns_list=None, num_parallel_workers=None, shuffle=None, num_shards=None, shard_id=None, sampler=None, padded_sample=None, num_padded=None, num_samples=None, cache=None)
 
     读取和解析MindRecord数据文件作为源数据集。生成的数据集的列名和列类型取决于MindRecord文件中的保存的列名与类型。
 
     **参数：**
 
-    - **dataset_file** (Union[str, list[str]]) - MindRecord文件路径，支持单文件路径字符串、多文件路径字符串列表。如果 `dataset_file` 的类型是字符串，则它代表一组具有相同前缀名的MindRecord文件，同一路径下具有相同前缀名的其他MindRecord文件将会被自动寻找并加载。如果 `dataset_file` 的类型是列表，则它表示所需读取的MindRecord数据文件。
+    - **dataset_files** (Union[str, list[str]]) - MindRecord文件路径，支持单文件路径字符串、多文件路径字符串列表。如果 `dataset_files` 的类型是字符串，则它代表一组具有相同前缀名的MindRecord文件，同一路径下具有相同前缀名的其他MindRecord文件将会被自动寻找并加载。如果 `dataset_files` 的类型是列表，则它表示所需读取的MindRecord数据文件。
     - **columns_list** (list[str]，可选) - 指定从MindRecord文件中读取的数据列（默认为None，读取所有列）。
     - **num_parallel_workers** (int，可选) - 指定读取数据的工作线程数（默认值None，即使用mindspore.dataset.config中配置的线程数）。
     - **shuffle** (Union[bool, Shuffle level], 可选) - 每个epoch中数据混洗的模式（默认为为mindspore.dataset.Shuffle.GLOBAL）。如果为False，则不混洗；如果为True，等同于将 `shuffle` 设置为mindspore.dataset.Shuffle.GLOBAL。另外也可以传入枚举变量设置shuffle级别：
@@ -63,7 +63,7 @@
     **样例：**
 
     >>> mind_dataset_dir = ["/path/to/mind_dataset_file"] # 此列表可以包含1个或多个MindRecord文件
-    >>> dataset = ds.MindDataset(dataset_file=mind_dataset_dir)
+    >>> dataset = ds.MindDataset(dataset_files=mind_dataset_dir)
 
     .. include:: mindspore.dataset.Dataset.add_sampler.rst
 
diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py
index 9a61ea207b3..56caf3e1ea5 100644
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -46,8 +46,8 @@ class Vocab(cde.Vocab):
 
         This would collect all unique words in a dataset and return a vocab within
         the frequency range specified by user in freq_range. User would be warned if no words fall into the frequency.
-        Words in vocab are ordered from highest frequency to lowest frequency. Words with the same frequency would be
-        ordered lexicographically.
+        Words in vocab are ordered from the highest frequency to the lowest frequency. Words with the same frequency
+        would be ordered lexicographically.
 
         Args:
             dataset(Dataset): dataset to build vocab from.
@@ -86,7 +86,7 @@ class Vocab(cde.Vocab):
 
         Args:
             word_list(list): A list of string where each element is a word of type string.
-            special_tokens(list, optional):  A list of strings, each one is a special token. for example
+            special_tokens(list, optional):  A list of strings, each one is a special token. For example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): Whether special_tokens is prepended or appended to vocab. If special_tokens
                 is specified and special_first is set to True, special_tokens will be prepended (default=True).
@@ -112,7 +112,7 @@ class Vocab(cde.Vocab):
             delimiter (str, optional): A delimiter to break up each line in file, the first element is taken to be
                 the word (default="").
             vocab_size (int, optional): Number of words to read from file_path (default=None, all words are taken).
-            special_tokens (list, optional):  A list of strings, each one is a special token. for example
+            special_tokens (list, optional):  A list of strings, each one is a special token. For example
                 special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first (bool, optional): Whether special_tokens will be prepended/appended to vocab,
                 If special_tokens is specified and special_first is set to True,
@@ -262,7 +262,7 @@ def to_str(array, encoding='utf8'):
 
     Args:
         array (numpy.ndarray): Array of `bytes` type representing strings.
-        encoding (str): Indicating the charset for decoding.
+        encoding (str): Indicating the charset for decoding (default='utf8').
 
     Returns:
         numpy.ndarray, NumPy array of `str`.
@@ -286,7 +286,7 @@ def to_bytes(array, encoding='utf8'):
 
     Args:
         array (numpy.ndarray): Array of `str` type representing strings.
-        encoding (str): Indicating the charset for encoding.
+        encoding (str): Indicating the charset for encoding (default='utf8').
 
     Returns:
         numpy.ndarray, NumPy array of `bytes`.
diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py
index ddeec8c3b1b..ec320f39fb0 100644
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -286,7 +286,7 @@ class PadEnd(TensorOperation):
     Args:
         pad_shape (list(int)): List of integers representing the shape needed. Dimensions that set to `None` will
             not be padded (i.e., original dim will be used). Shorter dimensions will truncate the values.
-        pad_value (Union[str, bytes, int, float, bool]), optional): Value used to pad. Default to 0 or empty
+        pad_value (Union[str, bytes, int, float, bool], optional): Value used to pad. Default to 0 or empty
             string in case of tensors of strings.
 
     Examples: