!2716 del JiebaMode and NormalizeForm from python api doc

Merge pull request !2716 from qianlong21st/fix_python_api
This commit is contained in:
mindspore-ci-bot 2020-06-29 19:14:51 +08:00 committed by Gitee
commit cf4d317d3e
3 changed files with 21 additions and 10 deletions

View File

@ -24,7 +24,7 @@ from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm
__all__ = [
"Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
"to_str", "to_bytes", "JiebaMode", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
"to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
"PythonTokenizer"
]
@ -33,4 +33,4 @@ if platform.system().lower() != 'windows':
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer
__all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer", "NormalizeForm"])
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])

View File

@ -119,10 +119,12 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
the dictionary can be obtained on the official website of cppjieba.
mp_path (str): the dictionary file is used by MPSegment algorithm,
the dictionary can be obtained on the official website of cppjieba.
mode (JiebaMode, optional): "MP" model will tokenize with MPSegment algorithm,
"HMM" mode will tokenize with Hiddel Markov Model Segment algorithm,
"MIX" model will tokenize with a mix of MPSegment and HMMSegment algorithm
(default="MIX").
mode (JiebaMode, optional): Valid values can be any of [JiebaMode.MP, JiebaMode.HMM,
JiebaMode.MIX](default=JiebaMode.MIX).
- JiebaMode.MP, tokenize with MPSegment algorithm.
- JiebaMode.HMM, tokenize with Hiddel Markov Model Segment algorithm.
- JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
"""
@check_jieba_init
@ -287,10 +289,16 @@ if platform.system().lower() != 'windows':
Apply normalize operation on utf-8 string tensor.
Args:
normalize_form (NormalizeForm, optional): Valid values are "NONE", "NFC", "NFKC", "NFD", "NFKD".
If set "NONE", will do nothing for input string tensor.
If set to any of "NFC", "NFKC", "NFD", "NFKD", will apply normalize operation(default="NFKC").
See http://unicode.org/reports/tr15/ for details.
normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE,
NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD,
NormalizeForm.NFKD](default=NormalizeForm.NFKC).
And you can see http://unicode.org/reports/tr15/ for details.
- NormalizeForm.NONE, do nothing for input string tensor.
- NormalizeForm.NFC, normalize with Normalization Form C.
- NormalizeForm.NFKC, normalize with Normalization Form KC.
- NormalizeForm.NFD, normalize with Normalization Form D.
- NormalizeForm.NFKD, normalize with Normalization Form KD.
"""
def __init__(self, normalize_form=NormalizeForm.NFKC):

View File

@ -24,6 +24,9 @@ import mindspore._c_dataengine as cde
from .validators import check_from_file, check_from_list, check_from_dict, check_from_dataset
__all__ = [
"Vocab", "to_str", "to_bytes"
]
class Vocab(cde.Vocab):
"""