forked from mindspore-Ecosystem/mindspore
!14518 【MD】【BUG】fix PythonTokenizer bug in windows
From: @xulei2020 Reviewed-by: @leonwanghui,@liucunwei Signed-off-by: @liucunwei
This commit is contained in:
commit
bdebb77219
|
@ -19,7 +19,7 @@ utils provides some general methods for NLP text processing.
|
||||||
"""
|
"""
|
||||||
import platform
|
import platform
|
||||||
from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, TruncateSequencePair, \
|
from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, TruncateSequencePair, \
|
||||||
ToNumber, SlidingWindow, SentencePieceTokenizer
|
ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer
|
||||||
from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
|
from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
|
||||||
SPieceTokenizerOutType, SPieceTokenizerLoadType
|
SPieceTokenizerOutType, SPieceTokenizerLoadType
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ __all__ = [
|
||||||
|
|
||||||
if platform.system().lower() != 'windows':
|
if platform.system().lower() != 'windows':
|
||||||
from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \
|
from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \
|
||||||
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer
|
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer
|
||||||
|
|
||||||
__all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
|
__all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
|
||||||
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
|
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
|
||||||
|
|
Loading…
Reference in New Issue