forked from mindspore-Ecosystem/mindspore
!2716 del JiebaMode and NormalizeForm from python api doc
Merge pull request !2716 from qianlong21st/fix_python_api
This commit is contained in:
commit
cf4d317d3e
|
@ -24,7 +24,7 @@ from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm
|
|||
|
||||
__all__ = [
|
||||
"Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
|
||||
"to_str", "to_bytes", "JiebaMode", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
|
||||
"to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
|
||||
"PythonTokenizer"
|
||||
]
|
||||
|
||||
|
@ -33,4 +33,4 @@ if platform.system().lower() != 'windows':
|
|||
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer
|
||||
|
||||
__all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
|
||||
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer", "NormalizeForm"])
|
||||
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
|
||||
|
|
|
@ -119,10 +119,12 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
|
|||
the dictionary can be obtained on the official website of cppjieba.
|
||||
mp_path (str): the dictionary file is used by MPSegment algorithm,
|
||||
the dictionary can be obtained on the official website of cppjieba.
|
||||
mode (JiebaMode, optional): "MP" model will tokenize with MPSegment algorithm,
|
||||
"HMM" mode will tokenize with Hiddel Markov Model Segment algorithm,
|
||||
"MIX" model will tokenize with a mix of MPSegment and HMMSegment algorithm
|
||||
(default="MIX").
|
||||
mode (JiebaMode, optional): Valid values can be any of [JiebaMode.MP, JiebaMode.HMM,
|
||||
JiebaMode.MIX](default=JiebaMode.MIX).
|
||||
|
||||
- JiebaMode.MP, tokenize with MPSegment algorithm.
|
||||
- JiebaMode.HMM, tokenize with Hiddel Markov Model Segment algorithm.
|
||||
- JiebaMode.MIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
|
||||
"""
|
||||
|
||||
@check_jieba_init
|
||||
|
@ -287,10 +289,16 @@ if platform.system().lower() != 'windows':
|
|||
Apply normalize operation on utf-8 string tensor.
|
||||
|
||||
Args:
|
||||
normalize_form (NormalizeForm, optional): Valid values are "NONE", "NFC", "NFKC", "NFD", "NFKD".
|
||||
If set "NONE", will do nothing for input string tensor.
|
||||
If set to any of "NFC", "NFKC", "NFD", "NFKD", will apply normalize operation(default="NFKC").
|
||||
See http://unicode.org/reports/tr15/ for details.
|
||||
normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE,
|
||||
NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD,
|
||||
NormalizeForm.NFKD](default=NormalizeForm.NFKC).
|
||||
And you can see http://unicode.org/reports/tr15/ for details.
|
||||
|
||||
- NormalizeForm.NONE, do nothing for input string tensor.
|
||||
- NormalizeForm.NFC, normalize with Normalization Form C.
|
||||
- NormalizeForm.NFKC, normalize with Normalization Form KC.
|
||||
- NormalizeForm.NFD, normalize with Normalization Form D.
|
||||
- NormalizeForm.NFKD, normalize with Normalization Form KD.
|
||||
"""
|
||||
|
||||
def __init__(self, normalize_form=NormalizeForm.NFKC):
|
||||
|
|
|
@ -24,6 +24,9 @@ import mindspore._c_dataengine as cde
|
|||
|
||||
from .validators import check_from_file, check_from_list, check_from_dict, check_from_dataset
|
||||
|
||||
__all__ = [
|
||||
"Vocab", "to_str", "to_bytes"
|
||||
]
|
||||
|
||||
class Vocab(cde.Vocab):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue