!2716 del JiebaMode and NormalizeForm from python api doc

Merge pull request !2716 from qianlong21st/fix_python_api
2020-06-29 19:14:51 +08:00 · 2020-06-29 19:14:51 +08:00 · cf4d317d3e
parent f7610a6caa 94581f1c43
commit cf4d317d3e
3 changed files with 21 additions and 10 deletions
--- a/mindspore/dataset/text/init.py
+++ b/mindspore/dataset/text/init.py
@ -24,7 +24,7 @@ from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm

 __all__ = [
    "Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
-    "to_str", "to_bytes", "JiebaMode", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
+    "to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
    "PythonTokenizer"
 ]

@ -33,4 +33,4 @@ if platform.system().lower() != 'windows':
        RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer

    __all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
-                    "RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer", "NormalizeForm"])
+                    "RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@ -119,10 +119,12 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
            the dictionary can be obtained on the official website of cppjieba.
        mp_path (str): the dictionary file is used by MPSegment algorithm,
            the dictionary can be obtained on the official website of cppjieba.
-        mode (JiebaMode, optional): "MP" model will tokenize with MPSegment algorithm,
-            "HMM" mode will tokenize with Hiddel Markov Model Segment algorithm,
-            "MIX" model will tokenize with a mix of MPSegment and HMMSegment algorithm
-            (default="MIX").
+        mode (JiebaMode, optional): Valid values can be any of [JiebaMode.MP, JiebaMode.HMM,
+            JiebaMode.MIX](default=JiebaMode.MIX).
+
+            - JiebaMode.MP, tokenize with MPSegment algorithm.
+            - JiebaMode.HMM, tokenize with Hiddel Markov Model Segment algorithm.
+            - JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
    """

    @check_jieba_init
@ -287,10 +289,16 @@ if platform.system().lower() != 'windows':
        Apply normalize operation on utf-8 string tensor.

        Args:
-            normalize_form (NormalizeForm, optional): Valid values are "NONE", "NFC", "NFKC", "NFD", "NFKD".
-                If set "NONE", will do nothing for input string tensor.
-                If set to any of "NFC", "NFKC", "NFD", "NFKD", will apply normalize operation(default="NFKC").
-                See http://unicode.org/reports/tr15/ for details.
+            normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE,
+                NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD,
+                NormalizeForm.NFKD](default=NormalizeForm.NFKC).
+                And you can see http://unicode.org/reports/tr15/ for details.
+
+                - NormalizeForm.NONE, do nothing for input string tensor.
+                - NormalizeForm.NFC, normalize with Normalization Form C.
+                - NormalizeForm.NFKC, normalize with Normalization Form KC.
+                - NormalizeForm.NFD, normalize with Normalization Form D.
+                - NormalizeForm.NFKD, normalize with Normalization Form KD.
        """

        def __init__(self, normalize_form=NormalizeForm.NFKC):
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@ -24,6 +24,9 @@ import mindspore._c_dataengine as cde

 from .validators import check_from_file, check_from_list, check_from_dict, check_from_dataset

+__all__ = [
+    "Vocab", "to_str", "to_bytes"
+]

 class Vocab(cde.Vocab):
    """