!35949 Add new APIs and fix doc problems

Merge pull request !35949 from xiaotianci/add_api
2022-06-21 02:25:54 +00:00 · 2022-06-21 02:25:54 +00:00 · 5cf4a18d06
parent d1d44a16cd 18371fa5d9
commit 5cf4a18d06
16 changed files with 887 additions and 792 deletions
--- a/docs/api/api_python/mindspore.dataset.audio.rst
+++ b/docs/api/api_python/mindspore.dataset.audio.rst
@ -3,30 +3,75 @@ mindspore.dataset.audio

 .. include:: dataset_audio/mindspore.dataset.audio.rst

-mindspore.dataset.audio.transforms
----------------------------------
+Transforms
+----------

 .. mscnautosummary::
    :toctree: dataset_audio

-    mindspore.dataset.audio.transforms.AllpassBiquad
-    mindspore.dataset.audio.transforms.AmplitudeToDB
-    mindspore.dataset.audio.transforms.Angle
-    mindspore.dataset.audio.transforms.BandBiquad
-    mindspore.dataset.audio.transforms.BandpassBiquad
-    mindspore.dataset.audio.transforms.BandrejectBiquad
-    mindspore.dataset.audio.transforms.BassBiquad
-    mindspore.dataset.audio.transforms.ComplexNorm
-    mindspore.dataset.audio.transforms.Contrast
-    mindspore.dataset.audio.transforms.FrequencyMasking
-    mindspore.dataset.audio.transforms.LowpassBiquad
-    mindspore.dataset.audio.transforms.TimeMasking
-    mindspore.dataset.audio.transforms.TimeStretch
+    mindspore.dataset.audio.AllpassBiquad
+    mindspore.dataset.audio.AmplitudeToDB
+    mindspore.dataset.audio.Angle
+    mindspore.dataset.audio.BandBiquad
+    mindspore.dataset.audio.BandpassBiquad
+    mindspore.dataset.audio.BandrejectBiquad
+    mindspore.dataset.audio.BassBiquad
+    mindspore.dataset.audio.Biquad
+    mindspore.dataset.audio.ComplexNorm
+    mindspore.dataset.audio.ComputeDeltas
+    mindspore.dataset.audio.Contrast
+    mindspore.dataset.audio.DBToAmplitude
+    mindspore.dataset.audio.DCShift
+    mindspore.dataset.audio.DeemphBiquad
+    mindspore.dataset.audio.DetectPitchFrequency
+    mindspore.dataset.audio.Dither
+    mindspore.dataset.audio.EqualizerBiquad
+    mindspore.dataset.audio.Fade
+    mindspore.dataset.audio.Flanger
+    mindspore.dataset.audio.FrequencyMasking
+    mindspore.dataset.audio.Gain
+    mindspore.dataset.audio.GriffinLim
+    mindspore.dataset.audio.HighpassBiquad
+    mindspore.dataset.audio.InverseMelScale
+    mindspore.dataset.audio.LFilter
+    mindspore.dataset.audio.LowpassBiquad
+    mindspore.dataset.audio.Magphase
+    mindspore.dataset.audio.MaskAlongAxis
+    mindspore.dataset.audio.MaskAlongAxisIID
+    mindspore.dataset.audio.MelScale
+    mindspore.dataset.audio.MuLawDecoding
+    mindspore.dataset.audio.MuLawEncoding
+    mindspore.dataset.audio.Overdrive
+    mindspore.dataset.audio.Phaser
+    mindspore.dataset.audio.PhaseVocoder
+    mindspore.dataset.audio.Resample
+    mindspore.dataset.audio.RiaaBiquad
+    mindspore.dataset.audio.SlidingWindowCmn
+    mindspore.dataset.audio.SpectralCentroid
+    mindspore.dataset.audio.Spectrogram
+    mindspore.dataset.audio.TimeMasking
+    mindspore.dataset.audio.TimeStretch
+    mindspore.dataset.audio.TrebleBiquad
+    mindspore.dataset.audio.Vad
+    mindspore.dataset.audio.Vol

-mindspore.dataset.audio.utils
-----------------------------
+Utilities
+---------

 .. mscnautosummary::
    :toctree: dataset_audio

-    mindspore.dataset.audio.utils.ScaleType
+    mindspore.dataset.audio.BorderType
+    mindspore.dataset.audio.DensityFunction
+    mindspore.dataset.audio.FadeShape
+    mindspore.dataset.audio.GainType
+    mindspore.dataset.audio.Interpolation
+    mindspore.dataset.audio.MelType
+    mindspore.dataset.audio.Modulation
+    mindspore.dataset.audio.NormMode
+    mindspore.dataset.audio.NormType
+    mindspore.dataset.audio.ResampleMethod
+    mindspore.dataset.audio.ScaleType
+    mindspore.dataset.audio.WindowType
+    mindspore.dataset.audio.create_dct
+    mindspore.dataset.audio.melscale_fbanks
--- a/docs/api/api_python/mindspore.dataset.text.rst
+++ b/docs/api/api_python/mindspore.dataset.text.rst
@ -19,48 +19,54 @@ mindspore.dataset.text
 - TensorOperation，所有C++实现的数据处理操作的基类。
 - TextTensorOperation，所有文本数据处理操作的基类，派生自TensorOperation。

-mindspore.dataset.text.transforms
---------------------------------
+Transforms
+----------

 .. mscnnoteautosummary::
    :toctree: dataset_text
    :nosignatures:
    :template: classtemplate.rst

-    mindspore.dataset.text.transforms.BasicTokenizer
-    mindspore.dataset.text.transforms.BertTokenizer
-    mindspore.dataset.text.transforms.CaseFold
-    mindspore.dataset.text.transforms.JiebaTokenizer
-    mindspore.dataset.text.transforms.Lookup
-    mindspore.dataset.text.transforms.Ngram
-    mindspore.dataset.text.transforms.NormalizeUTF8
-    mindspore.dataset.text.transforms.PythonTokenizer
-    mindspore.dataset.text.transforms.RegexReplace
-    mindspore.dataset.text.transforms.RegexTokenizer
-    mindspore.dataset.text.transforms.SentencePieceTokenizer
-    mindspore.dataset.text.transforms.SlidingWindow
-    mindspore.dataset.text.transforms.ToNumber
-    mindspore.dataset.text.transforms.TruncateSequencePair
-    mindspore.dataset.text.transforms.UnicodeCharTokenizer
-    mindspore.dataset.text.transforms.UnicodeScriptTokenizer
-    mindspore.dataset.text.transforms.WhitespaceTokenizer
-    mindspore.dataset.text.transforms.WordpieceTokenizer
+    mindspore.dataset.text.BasicTokenizer
+    mindspore.dataset.text.BertTokenizer
+    mindspore.dataset.text.CaseFold
+    mindspore.dataset.text.FilterWikipediaXML
+    mindspore.dataset.text.JiebaTokenizer
+    mindspore.dataset.text.Lookup
+    mindspore.dataset.text.Ngram
+    mindspore.dataset.text.NormalizeUTF8
+    mindspore.dataset.text.PythonTokenizer
+    mindspore.dataset.text.RegexReplace
+    mindspore.dataset.text.RegexTokenizer
+    mindspore.dataset.text.SentencePieceTokenizer
+    mindspore.dataset.text.SlidingWindow
+    mindspore.dataset.text.ToNumber
+    mindspore.dataset.text.ToVectors
+    mindspore.dataset.text.TruncateSequencePair
+    mindspore.dataset.text.UnicodeCharTokenizer
+    mindspore.dataset.text.UnicodeScriptTokenizer
+    mindspore.dataset.text.WhitespaceTokenizer
+    mindspore.dataset.text.WordpieceTokenizer


-mindspore.dataset.text.utils
----------------------------
+Utilities
+---------

 .. mscnnoteautosummary::
    :toctree: dataset_text
    :nosignatures:
    :template: classtemplate.rst

+    mindspore.dataset.text.CharNGram
+    mindspore.dataset.text.FastText
+    mindspore.dataset.text.GloVe
    mindspore.dataset.text.JiebaMode
    mindspore.dataset.text.NormalizeForm
    mindspore.dataset.text.SentencePieceModel
    mindspore.dataset.text.SentencePieceVocab
    mindspore.dataset.text.SPieceTokenizerLoadType
    mindspore.dataset.text.SPieceTokenizerOutType
-    mindspore.dataset.text.to_str
-    mindspore.dataset.text.to_bytes
+    mindspore.dataset.text.Vectors
    mindspore.dataset.text.Vocab
+    mindspore.dataset.text.to_bytes
+    mindspore.dataset.text.to_str
--- a/docs/api/api_python/mindspore.dataset.transforms.rst
+++ b/docs/api/api_python/mindspore.dataset.transforms.rst
@ -44,8 +44,8 @@ Transforms
    mindspore.dataset.transforms.TypeCast
    mindspore.dataset.transforms.Unique

-Others
------
+Utilities
+---------

 .. mscnautosummary::
    :toctree: dataset_transforms
--- a/docs/api/api_python/mindspore.dataset.vision.rst
+++ b/docs/api/api_python/mindspore.dataset.vision.rst
@ -98,8 +98,8 @@ Transforms
    mindspore.dataset.vision.UniformAugment
    mindspore.dataset.vision.VerticalFlip

-Others
------
+Utilities
+---------

 .. mscnautosummary::
    :toctree: dataset_vision
@ -112,3 +112,5 @@ Others
    mindspore.dataset.vision.ImageBatchFormat
    mindspore.dataset.vision.Inter
    mindspore.dataset.vision.SliceMode
+    mindspore.dataset.vision.get_image_num_channels
+    mindspore.dataset.vision.get_image_size
--- a/docs/api/api_python_en/mindspore.dataset.audio.rst
+++ b/docs/api/api_python_en/mindspore.dataset.audio.rst
@ -3,35 +3,80 @@ mindspore.dataset.audio

 .. automodule:: mindspore.dataset.audio

-mindspore.dataset.audio.transforms
----------------------------------
+Transforms
+----------

 .. autosummary::
    :toctree: dataset_audio
    :nosignatures:
    :template: classtemplate.rst

-    mindspore.dataset.audio.transforms.AllpassBiquad
-    mindspore.dataset.audio.transforms.AmplitudeToDB
-    mindspore.dataset.audio.transforms.Angle
-    mindspore.dataset.audio.transforms.BandBiquad
-    mindspore.dataset.audio.transforms.BandpassBiquad
-    mindspore.dataset.audio.transforms.BandrejectBiquad
-    mindspore.dataset.audio.transforms.BassBiquad
-    mindspore.dataset.audio.transforms.ComplexNorm
-    mindspore.dataset.audio.transforms.Contrast
-    mindspore.dataset.audio.transforms.FrequencyMasking
-    mindspore.dataset.audio.transforms.LowpassBiquad
-    mindspore.dataset.audio.transforms.TimeMasking
-    mindspore.dataset.audio.transforms.TimeStretch
+    mindspore.dataset.audio.AllpassBiquad
+    mindspore.dataset.audio.AmplitudeToDB
+    mindspore.dataset.audio.Angle
+    mindspore.dataset.audio.BandBiquad
+    mindspore.dataset.audio.BandpassBiquad
+    mindspore.dataset.audio.BandrejectBiquad
+    mindspore.dataset.audio.BassBiquad
+    mindspore.dataset.audio.Biquad
+    mindspore.dataset.audio.ComplexNorm
+    mindspore.dataset.audio.ComputeDeltas
+    mindspore.dataset.audio.Contrast
+    mindspore.dataset.audio.DBToAmplitude
+    mindspore.dataset.audio.DCShift
+    mindspore.dataset.audio.DeemphBiquad
+    mindspore.dataset.audio.DetectPitchFrequency
+    mindspore.dataset.audio.Dither
+    mindspore.dataset.audio.EqualizerBiquad
+    mindspore.dataset.audio.Fade
+    mindspore.dataset.audio.Flanger
+    mindspore.dataset.audio.FrequencyMasking
+    mindspore.dataset.audio.Gain
+    mindspore.dataset.audio.GriffinLim
+    mindspore.dataset.audio.HighpassBiquad
+    mindspore.dataset.audio.InverseMelScale
+    mindspore.dataset.audio.LFilter
+    mindspore.dataset.audio.LowpassBiquad
+    mindspore.dataset.audio.Magphase
+    mindspore.dataset.audio.MaskAlongAxis
+    mindspore.dataset.audio.MaskAlongAxisIID
+    mindspore.dataset.audio.MelScale
+    mindspore.dataset.audio.MuLawDecoding
+    mindspore.dataset.audio.MuLawEncoding
+    mindspore.dataset.audio.Overdrive
+    mindspore.dataset.audio.Phaser
+    mindspore.dataset.audio.PhaseVocoder
+    mindspore.dataset.audio.Resample
+    mindspore.dataset.audio.RiaaBiquad
+    mindspore.dataset.audio.SlidingWindowCmn
+    mindspore.dataset.audio.SpectralCentroid
+    mindspore.dataset.audio.Spectrogram
+    mindspore.dataset.audio.TimeMasking
+    mindspore.dataset.audio.TimeStretch
+    mindspore.dataset.audio.TrebleBiquad
+    mindspore.dataset.audio.Vad
+    mindspore.dataset.audio.Vol


-mindspore.dataset.audio.utils
-----------------------------
+Utilities
+---------

 .. autosummary::
    :toctree: dataset_audio
    :nosignatures:
    :template: classtemplate.rst

-    mindspore.dataset.audio.utils.ScaleType
+    mindspore.dataset.audio.BorderType
+    mindspore.dataset.audio.DensityFunction
+    mindspore.dataset.audio.FadeShape
+    mindspore.dataset.audio.GainType
+    mindspore.dataset.audio.Interpolation
+    mindspore.dataset.audio.MelType
+    mindspore.dataset.audio.Modulation
+    mindspore.dataset.audio.NormMode
+    mindspore.dataset.audio.NormType
+    mindspore.dataset.audio.ResampleMethod
+    mindspore.dataset.audio.ScaleType
+    mindspore.dataset.audio.WindowType
+    mindspore.dataset.audio.create_dct
+    mindspore.dataset.audio.melscale_fbanks
--- a/docs/api/api_python_en/mindspore.dataset.text.rst
+++ b/docs/api/api_python_en/mindspore.dataset.text.rst
@ -3,48 +3,54 @@ mindspore.dataset.text

 .. automodule:: mindspore.dataset.text

-mindspore.dataset.text.transforms
---------------------------------
+Transforms
+----------

 .. msnoteautosummary::
    :toctree: dataset_text
    :nosignatures:
    :template: classtemplate.rst

-    mindspore.dataset.text.transforms.BasicTokenizer
-    mindspore.dataset.text.transforms.BertTokenizer
-    mindspore.dataset.text.transforms.CaseFold
-    mindspore.dataset.text.transforms.JiebaTokenizer
-    mindspore.dataset.text.transforms.Lookup
-    mindspore.dataset.text.transforms.Ngram
-    mindspore.dataset.text.transforms.NormalizeUTF8
-    mindspore.dataset.text.transforms.PythonTokenizer
-    mindspore.dataset.text.transforms.RegexReplace
-    mindspore.dataset.text.transforms.RegexTokenizer
-    mindspore.dataset.text.transforms.SentencePieceTokenizer
-    mindspore.dataset.text.transforms.SlidingWindow
-    mindspore.dataset.text.transforms.ToNumber
-    mindspore.dataset.text.transforms.TruncateSequencePair
-    mindspore.dataset.text.transforms.UnicodeCharTokenizer
-    mindspore.dataset.text.transforms.UnicodeScriptTokenizer
-    mindspore.dataset.text.transforms.WhitespaceTokenizer
-    mindspore.dataset.text.transforms.WordpieceTokenizer
+    mindspore.dataset.text.BasicTokenizer
+    mindspore.dataset.text.BertTokenizer
+    mindspore.dataset.text.CaseFold
+    mindspore.dataset.text.FilterWikipediaXML
+    mindspore.dataset.text.JiebaTokenizer
+    mindspore.dataset.text.Lookup
+    mindspore.dataset.text.Ngram
+    mindspore.dataset.text.NormalizeUTF8
+    mindspore.dataset.text.PythonTokenizer
+    mindspore.dataset.text.RegexReplace
+    mindspore.dataset.text.RegexTokenizer
+    mindspore.dataset.text.SentencePieceTokenizer
+    mindspore.dataset.text.SlidingWindow
+    mindspore.dataset.text.ToNumber
+    mindspore.dataset.text.ToVectors
+    mindspore.dataset.text.TruncateSequencePair
+    mindspore.dataset.text.UnicodeCharTokenizer
+    mindspore.dataset.text.UnicodeScriptTokenizer
+    mindspore.dataset.text.WhitespaceTokenizer
+    mindspore.dataset.text.WordpieceTokenizer


-mindspore.dataset.text.utils
----------------------------
+Utilities
+---------

 .. msnoteautosummary::
    :toctree: dataset_text
    :nosignatures:
    :template: classtemplate.rst

+    mindspore.dataset.text.CharNGram
+    mindspore.dataset.text.FastText
+    mindspore.dataset.text.GloVe
    mindspore.dataset.text.JiebaMode
    mindspore.dataset.text.NormalizeForm
    mindspore.dataset.text.SentencePieceModel
    mindspore.dataset.text.SentencePieceVocab
    mindspore.dataset.text.SPieceTokenizerLoadType
    mindspore.dataset.text.SPieceTokenizerOutType
-    mindspore.dataset.text.to_str
-    mindspore.dataset.text.to_bytes
+    mindspore.dataset.text.Vectors
    mindspore.dataset.text.Vocab
+    mindspore.dataset.text.to_bytes
+    mindspore.dataset.text.to_str
--- a/docs/api/api_python_en/mindspore.dataset.transforms.rst
+++ b/docs/api/api_python_en/mindspore.dataset.transforms.rst
@ -25,8 +25,8 @@ Transforms
    mindspore.dataset.transforms.TypeCast
    mindspore.dataset.transforms.Unique

-Others
------
+Utilities
+---------

 .. autosummary::
    :toctree: dataset_transforms
--- a/docs/api/api_python_en/mindspore.dataset.vision.rst
+++ b/docs/api/api_python_en/mindspore.dataset.vision.rst
@ -77,8 +77,8 @@ Transforms
    mindspore.dataset.vision.UniformAugment
    mindspore.dataset.vision.VerticalFlip

-Others
------
+Utilities
+---------

 .. autosummary::
    :toctree: dataset_vision
@ -91,3 +91,5 @@ Others
    mindspore.dataset.vision.ImageBatchFormat
    mindspore.dataset.vision.Inter
    mindspore.dataset.vision.SliceMode
+    mindspore.dataset.vision.get_image_num_channels
+    mindspore.dataset.vision.get_image_size
--- a/mindspore/python/mindspore/dataset/audio/init.py
+++ b/mindspore/python/mindspore/dataset/audio/init.py
@ -37,12 +37,13 @@ Descriptions of common data processing terms are as follows:
 - TensorOperation, the base class of all data processing operations implemented in C++.
 - AudioTensorOperation, the base class of all audio processing operations. It is a derived class of TensorOperation.
 """
+from . import transforms
+from . import utils
 from .transforms import AllpassBiquad, AmplitudeToDB, Angle, BandBiquad, BandpassBiquad, BandrejectBiquad, BassBiquad, \
    Biquad, ComplexNorm, ComputeDeltas, Contrast, DBToAmplitude, DCShift, DeemphBiquad, DetectPitchFrequency, Dither, \
    EqualizerBiquad, Fade, Flanger, FrequencyMasking, Gain, GriffinLim, HighpassBiquad, InverseMelScale, LFilter, \
    LowpassBiquad, Magphase, MaskAlongAxis, MaskAlongAxisIID, MelScale, MuLawDecoding, MuLawEncoding, Overdrive, \
    Phaser, PhaseVocoder, Resample, RiaaBiquad, SlidingWindowCmn, SpectralCentroid, Spectrogram, TimeMasking, \
-    TimeStretch, TrebleBiquad, Vol
-from . import transforms
-from .utils import create_dct, melscale_fbanks, BorderType, DensityFunction, FadeShape, GainType, Interpolation, \
-    MelType, Modulation, NormMode, NormType, ResampleMethod, ScaleType, WindowType
+    TimeStretch, TrebleBiquad, Vad, Vol
+from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormMode, \
+    NormType, ResampleMethod, ScaleType, WindowType, create_dct, melscale_fbanks
--- a/mindspore/python/mindspore/dataset/text/init.py
+++ b/mindspore/python/mindspore/dataset/text/init.py
@ -30,22 +30,14 @@ Descriptions of common data processing terms are as follows:
 - TextTensorOperation, the base class of all text processing operations. It is a derived class of TensorOperation.
 """
 import platform
-from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, \
-    TruncateSequencePair, ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer, ToVectors
-from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
-    SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors, FastText, GloVe, CharNGram

-__all__ = [
-    "Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
-    "to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
-    "PythonTokenizer", "SlidingWindow", "SentencePieceVocab", "SentencePieceTokenizer", "SPieceTokenizerOutType",
-    "SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors", "FastText",
-    "GloVe", "CharNGram"
-]
+from . import transforms
+from . import utils
+from .transforms import JiebaTokenizer, Lookup, Ngram, PythonTokenizer, SentencePieceTokenizer, SlidingWindow, \
+    ToNumber, ToVectors, TruncateSequencePair, UnicodeCharTokenizer, WordpieceTokenizer
+from .utils import CharNGram, FastText, GloVe, JiebaMode, NormalizeForm, SentencePieceModel, SentencePieceVocab, \
+    SPieceTokenizerLoadType, SPieceTokenizerOutType, Vectors, Vocab, to_bytes, to_str

 if platform.system().lower() != 'windows':
-    from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \
-        RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer
-
-    __all__.extend(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
-                    "RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
+    from .transforms import BasicTokenizer, BertTokenizer, CaseFold, FilterWikipediaXML, NormalizeUTF8, RegexReplace, \
+        RegexTokenizer, UnicodeScriptTokenizer, WhitespaceTokenizer
--- a/mindspore/python/mindspore/dataset/text/transforms.py
+++ b/mindspore/python/mindspore/dataset/text/transforms.py
@ -362,6 +362,43 @@ class Ngram(TextTensorOperation):
        return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator)


+class PythonTokenizer:
+    """
+    Class that applies user-defined string tokenizer into input string.
+
+    Args:
+        tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens.
+
+    Raises:
+        TypeError: If `tokenizer` is not a callable Python function.
+
+    Supported Platforms:
+        ``CPU``
+
+    Examples:
+        >>> def my_tokenizer(line):
+        ...     return line.split()
+        >>> text_file_dataset = text_file_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
+    """
+
+    @check_python_tokenizer
+    def __init__(self, tokenizer):
+        self.pyfunc = tokenizer
+        self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)')
+        self.random = False
+
+    def __call__(self, in_array):
+        if not isinstance(in_array, np.ndarray):
+            raise TypeError("input should be a NumPy array. Got {}.".format(type(in_array)))
+        if in_array.dtype.type is np.bytes_:
+            in_array = to_str(in_array)
+        try:
+            tokens = self.tokenizer(in_array)
+        except Exception as e:
+            raise RuntimeError("Error occurred in Pyfunc [" + str(self.pyfunc.__name__) + "], error message: " + str(e))
+        return tokens
+
+
 class SentencePieceTokenizer(TextTensorOperation):
    """
    Tokenize scalar token or 1-D tokens to tokens by sentencepiece.
@ -653,43 +690,6 @@ class WordpieceTokenizer(TextTensorOperation):
                                               self.unknown_token, self.with_offsets)


-class PythonTokenizer:
-    """
-    Class that applies user-defined string tokenizer into input string.
-
-    Args:
-        tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens.
-
-    Raises:
-        TypeError: If `tokenizer` is not a callable Python function.
-
-    Supported Platforms:
-        ``CPU``
-
-    Examples:
-        >>> def my_tokenizer(line):
-        ...     return line.split()
-        >>> text_file_dataset = text_file_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
-    """
-
-    @check_python_tokenizer
-    def __init__(self, tokenizer):
-        self.pyfunc = tokenizer
-        self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)')
-        self.random = False
-
-    def __call__(self, in_array):
-        if not isinstance(in_array, np.ndarray):
-            raise TypeError("input should be a NumPy array. Got {}.".format(type(in_array)))
-        if in_array.dtype.type is np.bytes_:
-            in_array = to_str(in_array)
-        try:
-            tokens = self.tokenizer(in_array)
-        except Exception as e:
-            raise RuntimeError("Error occurred in Pyfunc [" + str(self.pyfunc.__name__) + "], error message: " + str(e))
-        return tokens
-
-
 if platform.system().lower() != 'windows':
    DE_C_INTER_NORMALIZE_FORM = {
        NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE,
--- a/mindspore/python/mindspore/dataset/text/utils.py
+++ b/mindspore/python/mindspore/dataset/text/utils.py
@ -26,9 +26,316 @@ from .validators import check_vocab, check_from_file, check_from_list, check_fro
    check_from_dataset_sentencepiece, check_from_file_sentencepiece, check_save_model, \
    check_from_file_vectors, check_tokens_to_ids, check_ids_to_tokens

-__all__ = [
-    "Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors", "FastText", "GloVe", "CharNGram"
-]
+
+class CharNGram(cde.CharNGram):
+    """
+    CharNGram object that is used to map tokens into pre-trained vectors.
+    """
+
+    @classmethod
+    @check_from_file_vectors
+    def from_file(cls, file_path, max_vectors=None):
+        """
+        Build a CharNGram vector from a file.
+
+        Args:
+            file_path (str): Path of the file that contains the CharNGram vectors.
+            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
+                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
+                passing max_vectors can limit the size of the loaded set (default=None, no limit).
+
+        Examples:
+            >>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
+        """
+
+        max_vectors = max_vectors if max_vectors is not None else 0
+        return super().from_file(file_path, max_vectors)
+
+
+class FastText(cde.FastText):
+    """
+    FastText object that is used to map tokens into vectors.
+    """
+
+    @classmethod
+    @check_from_file_vectors
+    def from_file(cls, file_path, max_vectors=None):
+        """
+        Build a FastText vector from a file.
+
+        Args:
+            file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
+                must be `*.vec`.
+            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
+                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
+                passing max_vectors can limit the size of the loaded set (default=None, no limit).
+
+        Examples:
+            >>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
+        """
+
+        max_vectors = max_vectors if max_vectors is not None else 0
+        return super().from_file(file_path, max_vectors)
+
+
+class GloVe(cde.GloVe):
+    """
+    GloVe object that is used to map tokens into vectors.
+    """
+
+    @classmethod
+    @check_from_file_vectors
+    def from_file(cls, file_path, max_vectors=None):
+        """
+        Build a GloVe vector from a file.
+
+        Args:
+            file_path (str): Path of the file that contains the vectors. The format of pre-trained vector sets
+                must be `glove.6B.*.txt`.
+            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
+                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
+                passing max_vectors can limit the size of the loaded set (default=None, no limit).
+
+        Examples:
+            >>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
+        """
+
+        max_vectors = max_vectors if max_vectors is not None else 0
+        return super().from_file(file_path, max_vectors)
+
+
+class JiebaMode(IntEnum):
+    """
+    An enumeration for JiebaTokenizer.
+
+    Possible enumeration values are: JiebaMode.MIX, JiebaMode.MP, JiebaMode.HMM.
+
+    - JiebaMode.MIX: tokenize with a mix of MPSegment and HMMSegment algorithm.
+    - JiebaMode.MP: tokenize with MPSegment algorithm.
+    - JiebaMode.HMM: tokenize with Hidden Markov Model Segment algorithm.
+    """
+
+    MIX = 0
+    MP = 1
+    HMM = 2
+
+
+class NormalizeForm(IntEnum):
+    """
+    Enumeration class for `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
+
+    Possible enumeration values are: NormalizeForm.NONE, NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD
+    and NormalizeForm.NFKD.
+
+    - NormalizeForm.NONE: no normalization.
+    - NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
+    - NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
+    - NormalizeForm.NFD: Canonical Decomposition.
+    - NormalizeForm.NFKD: Compatibility Decomposition.
+    """
+
+    NONE = 0
+    NFC = 1
+    NFKC = 2
+    NFD = 3
+    NFKD = 4
+
+
+class SentencePieceModel(IntEnum):
+    """
+    An enumeration for SentencePieceModel.
+
+    Possible enumeration values are: SentencePieceModel.UNIGRAM, SentencePieceModel.BPE, SentencePieceModel.CHAR,
+    SentencePieceModel.WORD.
+
+    - SentencePieceModel.UNIGRAM: Unigram Language Model means the next word in the sentence is assumed to be
+      independent of the previous words generated by the model.
+    - SentencePieceModel.BPE: refers to byte pair encoding algorithm, which replaces the most frequent pair of bytes in
+      a sentence with a single, unused byte.
+    - SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
+    - SentencePieceModel.WORD: refers to word based sentencePiece Model type.
+    """
+
+    UNIGRAM = 0
+    BPE = 1
+    CHAR = 2
+    WORD = 3
+
+
+DE_C_INTER_SENTENCEPIECE_MODE = {
+    SentencePieceModel.UNIGRAM: cde.SentencePieceModel.DE_SENTENCE_PIECE_UNIGRAM,
+    SentencePieceModel.BPE: cde.SentencePieceModel.DE_SENTENCE_PIECE_BPE,
+    SentencePieceModel.CHAR: cde.SentencePieceModel.DE_SENTENCE_PIECE_CHAR,
+    SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
+}
+
+
+class SentencePieceVocab:
+    """
+    SentencePiece object that is used to do words segmentation.
+    """
+
+    def __init__(self):
+        self.c_sentence_piece_vocab = None
+
+    @classmethod
+    @check_from_dataset_sentencepiece
+    def from_dataset(cls, dataset, col_names, vocab_size, character_coverage, model_type, params):
+        """
+        Build a SentencePiece from a dataset.
+
+        Args:
+            dataset (Dataset): Dataset to build SentencePiece.
+            col_names (list): The list of the col name.
+            vocab_size (int): Vocabulary size.
+            character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
+                languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
+                character set.
+            model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
+                SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
+                sentence must be pre-tokenized when using SentencePieceModel.WORD type.
+
+                - SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
+                  be independent of the previous words generated by the model.
+                - SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
+                  of bytes in a sentence with a single, unused byte.
+                - SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
+                - SentencePieceModel.WORD, refers to word based sentencePiece Model type.
+
+            params (dict): A dictionary with no incoming parameters.
+
+        Returns:
+            SentencePieceVocab, vocab built from the dataset.
+
+        Examples:
+            >>> from mindspore.dataset.text import SentencePieceModel
+            >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
+            >>> vocab = text.SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
+            ...                                              SentencePieceModel.UNIGRAM, {})
+        """
+
+        sentence_piece_vocab = cls()
+        sentence_piece_vocab.c_sentence_piece_vocab = dataset.build_sentencepiece_vocab(col_names, vocab_size,
+                                                                                        character_coverage,
+                                                                                        model_type, params)
+        return sentence_piece_vocab
+
+    @classmethod
+    @check_from_file_sentencepiece
+    def from_file(cls, file_path, vocab_size, character_coverage, model_type, params):
+        """
+        Build a SentencePiece object from a file.
+
+        Args:
+            file_path (list): Path to the file which contains the SentencePiece list.
+            vocab_size (int): Vocabulary size.
+            character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
+                languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
+                character set.
+            model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
+                SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
+                sentence must be pre-tokenized when using SentencePieceModel.WORD type.
+
+                - SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
+                  be independent of the previous words generated by the model.
+                - SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
+                  of bytes in a sentence with a single, unused byte.
+                - SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
+                - SentencePieceModel.WORD, refers to word based sentencePiece Model type.
+
+            params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
+                library).
+
+        Returns:
+            SentencePieceVocab, vocab built from the file.
+
+        Examples:
+            >>> from mindspore.dataset.text import SentencePieceModel
+            >>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
+            ...                                           SentencePieceModel.UNIGRAM, {})
+        """
+
+        sentence_piece_vocab = cls()
+        sentence_piece_vocab.c_sentence_piece_vocab = cde.SentencePieceVocab.from_file(
+            file_path, vocab_size, character_coverage, DE_C_INTER_SENTENCEPIECE_MODE.get(model_type), params)
+        return sentence_piece_vocab
+
+    @classmethod
+    @check_save_model
+    def save_model(cls, vocab, path, filename):
+        """
+        Save model into given filepath.
+
+        Args:
+            vocab (SentencePieceVocab): A SentencePiece object.
+            path (str): Path to store model.
+            filename (str): The name of the file.
+
+        Examples:
+            >>> from mindspore.dataset.text import SentencePieceModel
+            >>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
+            ...                                           SentencePieceModel.UNIGRAM, {})
+            >>> text.SentencePieceVocab.save_model(vocab, "./", "m.model")
+        """
+
+        cde.SentencePieceVocab.save_model(vocab.c_sentence_piece_vocab, path, filename)
+
+
+class SPieceTokenizerLoadType(IntEnum):
+    """
+    An enumeration for loading type of SentencePieceTokenizer.
+
+    Possible enumeration values are: SPieceTokenizerLoadType.FILE, SPieceTokenizerLoadType.MODEL.
+
+    - SPieceTokenizerLoadType.FILE: Load SentencePiece tokenizer from a Vocab file.
+    - SPieceTokenizerLoadType.MODEL: Load SentencePiece tokenizer from a SentencePieceVocab object.
+    """
+
+    FILE = 0
+    MODEL = 1
+
+
+class SPieceTokenizerOutType(IntEnum):
+    """
+    An enumeration for SPieceTokenizerOutType.
+
+    Possible enumeration values are: SPieceTokenizerOutType.STRING, SPieceTokenizerOutType.INT.
+
+    - SPieceTokenizerOutType.STRING: means output type of SentencePiece Tokenizer is string.
+    - SPieceTokenizerOutType.INT: means output type of SentencePiece Tokenizer is int.
+    """
+
+    STRING = 0
+    INT = 1
+
+
+class Vectors(cde.Vectors):
+    """
+    Vectors object that is used to map tokens into vectors.
+    """
+
+    @classmethod
+    @check_from_file_vectors
+    def from_file(cls, file_path, max_vectors=None):
+        """
+        Build a vector from a file.
+
+        Args:
+            file_path (str): Path of the file that contains the vectors.
+            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
+                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
+                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
+                passing max_vectors can limit the size of the loaded set (default=None, no limit).
+
+        Examples:
+            >>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
+        """
+
+        max_vectors = max_vectors if max_vectors is not None else 0
+        return super().from_file(file_path, max_vectors)


 class Vocab:
@ -41,66 +348,6 @@ class Vocab:
    def __init__(self):
        self.c_vocab = None

-    def vocab(self):
-        """
-        Get the vocabory table in dict type.
-
-        Returns:
-            A vocabulary consisting of word and id pairs.
-
-        Examples:
-            >>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
-            >>> vocabory_dict = vocab.vocab()
-        """
-        check_vocab(self.c_vocab)
-        return self.c_vocab.vocab()
-
-    @check_tokens_to_ids
-    def tokens_to_ids(self, tokens):
-        """
-        Converts a token string or a sequence of tokens in a single integer id or a sequence of ids.
-        If token does not exist, return id with value -1.
-
-        Args:
-            tokens (Union[str, list[str]]): One or several token(s) to convert to token id(s).
-
-        Returns:
-            The token id or list of token ids.
-
-        Examples:
-            >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
-            >>> ids = vocab.tokens_to_ids(["w1", "w3"])
-        """
-        check_vocab(self.c_vocab)
-        if isinstance(tokens, np.ndarray):
-            tokens = tokens.tolist()
-        if isinstance(tokens, str):
-            tokens = [tokens]
-        return self.c_vocab.tokens_to_ids(tokens)
-
-    @check_ids_to_tokens
-    def ids_to_tokens(self, ids):
-        """
-        Converts a single index or a sequence of indices in a token or a sequence of tokens.
-        If id does not exist, return empty string.
-
-        Args:
-            ids (Union[int, list[int]]): The token id (or token ids) to convert to tokens.
-
-        Returns:
-            The decoded token(s).
-
-        Examples:
-            >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
-            >>> token = vocab.ids_to_tokens(0)
-        """
-        check_vocab(self.c_vocab)
-        if isinstance(ids, np.ndarray):
-            ids = ids.tolist()
-        if isinstance(ids, int):
-            ids = [ids]
-        return self.c_vocab.ids_to_tokens(ids)
-
    @classmethod
    @check_from_dataset
    def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, special_first=True):
@ -236,141 +483,65 @@ class Vocab:
        vocab.c_vocab = cde.Vocab.from_dict(word_dict)
        return vocab

-
-class SentencePieceVocab:
-    """
-    SentencePiece object that is used to do words segmentation.
-    """
-
-    def __init__(self):
-        self.c_sentence_piece_vocab = None
-
-    @classmethod
-    @check_from_dataset_sentencepiece
-    def from_dataset(cls, dataset, col_names, vocab_size, character_coverage, model_type, params):
+    def vocab(self):
        """
-        Build a SentencePiece from a dataset.
-
-        Args:
-            dataset (Dataset): Dataset to build SentencePiece.
-            col_names (list): The list of the col name.
-            vocab_size (int): Vocabulary size.
-            character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
-                languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
-                character set.
-            model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
-                SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
-                sentence must be pre-tokenized when using SentencePieceModel.WORD type.
-
-                - SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
-                  be independent of the previous words generated by the model.
-                - SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
-                  of bytes in a sentence with a single, unused byte.
-                - SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
-                - SentencePieceModel.WORD, refers to word based sentencePiece Model type.
-
-            params (dict): A dictionary with no incoming parameters.
+        Get the vocabory table in dict type.

        Returns:
-            SentencePieceVocab, vocab built from the dataset.
+            A vocabulary consisting of word and id pairs.

        Examples:
-            >>> from mindspore.dataset.text import SentencePieceModel
-            >>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
-            >>> vocab = text.SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
-            ...                                              SentencePieceModel.UNIGRAM, {})
+            >>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
+            >>> vocabory_dict = vocab.vocab()
        """
+        check_vocab(self.c_vocab)
+        return self.c_vocab.vocab()

-        sentence_piece_vocab = cls()
-        sentence_piece_vocab.c_sentence_piece_vocab = dataset.build_sentencepiece_vocab(col_names, vocab_size,
-                                                                                        character_coverage,
-                                                                                        model_type, params)
-        return sentence_piece_vocab
-
-    @classmethod
-    @check_from_file_sentencepiece
-    def from_file(cls, file_path, vocab_size, character_coverage, model_type, params):
+    @check_tokens_to_ids
+    def tokens_to_ids(self, tokens):
        """
-        Build a SentencePiece object from a file.
+        Converts a token string or a sequence of tokens in a single integer id or a sequence of ids.
+        If token does not exist, return id with value -1.

        Args:
-            file_path (list): Path to the file which contains the SentencePiece list.
-            vocab_size (int): Vocabulary size.
-            character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
-                languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
-                character set.
-            model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
-                SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
-                sentence must be pre-tokenized when using SentencePieceModel.WORD type.
-
-                - SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
-                  be independent of the previous words generated by the model.
-                - SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
-                  of bytes in a sentence with a single, unused byte.
-                - SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
-                - SentencePieceModel.WORD, refers to word based sentencePiece Model type.
-
-            params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
-                library).
+            tokens (Union[str, list[str]]): One or several token(s) to convert to token id(s).

        Returns:
-            SentencePieceVocab, vocab built from the file.
+            The token id or list of token ids.

        Examples:
-            >>> from mindspore.dataset.text import SentencePieceModel
-            >>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
-            ...                                           SentencePieceModel.UNIGRAM, {})
+            >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
+            >>> ids = vocab.tokens_to_ids(["w1", "w3"])
        """
+        check_vocab(self.c_vocab)
+        if isinstance(tokens, np.ndarray):
+            tokens = tokens.tolist()
+        if isinstance(tokens, str):
+            tokens = [tokens]
+        return self.c_vocab.tokens_to_ids(tokens)

-        sentence_piece_vocab = cls()
-        sentence_piece_vocab.c_sentence_piece_vocab = \
-        cde.SentencePieceVocab.from_file(file_path, vocab_size, character_coverage,
-                                         DE_C_INTER_SENTENCEPIECE_MODE[model_type], params)
-        return sentence_piece_vocab
-
-    @classmethod
-    @check_save_model
-    def save_model(cls, vocab, path, filename):
+    @check_ids_to_tokens
+    def ids_to_tokens(self, ids):
        """
-        Save model into given filepath.
+        Converts a single index or a sequence of indices in a token or a sequence of tokens.
+        If id does not exist, return empty string.

        Args:
-            vocab (SentencePieceVocab): A SentencePiece object.
-            path (str): Path to store model.
-            filename (str): The name of the file.
+            ids (Union[int, list[int]]): The token id (or token ids) to convert to tokens.
+
+        Returns:
+            The decoded token(s).

        Examples:
-            >>> from mindspore.dataset.text import SentencePieceModel
-            >>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
-            ...                                           SentencePieceModel.UNIGRAM, {})
-            >>> text.SentencePieceVocab.save_model(vocab, "./", "m.model")
+            >>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
+            >>> token = vocab.ids_to_tokens(0)
        """
-
-        cde.SentencePieceVocab.save_model(vocab.c_sentence_piece_vocab, path, filename)
-
-
-def to_str(array, encoding='utf8'):
-    """
-    Convert NumPy array of `bytes` to array of `str` by decoding each element based on charset `encoding`.
-
-    Args:
-        array (numpy.ndarray): Array of `bytes` type representing strings.
-        encoding (str): Indicating the charset for decoding (default='utf8').
-
-    Returns:
-        numpy.ndarray, NumPy array of `str`.
-
-    Examples:
-        >>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"]
-        >>> dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir, shuffle=False)
-        >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
-        ...     data = text.to_str(item["text"])
-    """
-
-    if not isinstance(array, np.ndarray):
-        raise TypeError('input should be a NumPy array.')
-
-    return np.char.decode(array, encoding)
+        check_vocab(self.c_vocab)
+        if isinstance(ids, np.ndarray):
+            ids = ids.tolist()
+        if isinstance(ids, int):
+            ids = [ids]
+        return self.c_vocab.ids_to_tokens(ids)


 def to_bytes(array, encoding='utf8'):
@ -398,201 +569,25 @@ def to_bytes(array, encoding='utf8'):
    return np.char.encode(array, encoding)


-class JiebaMode(IntEnum):
+def to_str(array, encoding='utf8'):
    """
-    An enumeration for JiebaTokenizer.
+    Convert NumPy array of `bytes` to array of `str` by decoding each element based on charset `encoding`.

-    Possible enumeration values are: JiebaMode.MIX, JiebaMode.MP, JiebaMode.HMM.
+    Args:
+        array (numpy.ndarray): Array of `bytes` type representing strings.
+        encoding (str): Indicating the charset for decoding (default='utf8').

-    - JiebaMode.MIX: tokenize with a mix of MPSegment and HMMSegment algorithm.
-    - JiebaMode.MP: tokenize with MPSegment algorithm.
-    - JiebaMode.HMM: tokenize with Hidden Markov Model Segment algorithm.
+    Returns:
+        numpy.ndarray, NumPy array of `str`.
+
+    Examples:
+        >>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"]
+        >>> dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir, shuffle=False)
+        >>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
+        ...     data = text.to_str(item["text"])
    """

-    MIX = 0
-    MP = 1
-    HMM = 2
+    if not isinstance(array, np.ndarray):
+        raise TypeError('input should be a NumPy array.')

-
-class NormalizeForm(IntEnum):
-    """
-    Enumeration class for `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
-
-    Possible enumeration values are: NormalizeForm.NONE, NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD
-    and NormalizeForm.NFKD.
-
-    - NormalizeForm.NONE: no normalization.
-    - NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
-    - NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
-    - NormalizeForm.NFD: Canonical Decomposition.
-    - NormalizeForm.NFKD: Compatibility Decomposition.
-    """
-
-    NONE = 0
-    NFC = 1
-    NFKC = 2
-    NFD = 3
-    NFKD = 4
-
-
-class SentencePieceModel(IntEnum):
-    """
-    An enumeration for SentencePieceModel.
-
-    Possible enumeration values are: SentencePieceModel.UNIGRAM, SentencePieceModel.BPE, SentencePieceModel.CHAR,
-    SentencePieceModel.WORD.
-
-    - SentencePieceModel.UNIGRAM: Unigram Language Model means the next word in the sentence is assumed to be
-      independent of the previous words generated by the model.
-    - SentencePieceModel.BPE: refers to byte pair encoding algorithm, which replaces the most frequent pair of bytes in
-      a sentence with a single, unused byte.
-    - SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
-    - SentencePieceModel.WORD: refers to word based sentencePiece Model type.
-    """
-
-    UNIGRAM = 0
-    BPE = 1
-    CHAR = 2
-    WORD = 3
-
-
-DE_C_INTER_SENTENCEPIECE_MODE = {
-    SentencePieceModel.UNIGRAM: cde.SentencePieceModel.DE_SENTENCE_PIECE_UNIGRAM,
-    SentencePieceModel.BPE: cde.SentencePieceModel.DE_SENTENCE_PIECE_BPE,
-    SentencePieceModel.CHAR: cde.SentencePieceModel.DE_SENTENCE_PIECE_CHAR,
-    SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
-}
-
-
-class SPieceTokenizerOutType(IntEnum):
-    """
-    An enumeration for SPieceTokenizerOutType.
-
-    Possible enumeration values are: SPieceTokenizerOutType.STRING, SPieceTokenizerOutType.INT.
-
-    - SPieceTokenizerOutType.STRING: means output type of SentencePiece Tokenizer is string.
-    - SPieceTokenizerOutType.INT: means output type of SentencePiece Tokenizer is int.
-    """
-
-    STRING = 0
-    INT = 1
-
-
-class SPieceTokenizerLoadType(IntEnum):
-    """
-    An enumeration for loading type of SentencePieceTokenizer.
-
-    Possible enumeration values are: SPieceTokenizerLoadType.FILE, SPieceTokenizerLoadType.MODEL.
-
-    - SPieceTokenizerLoadType.FILE: Load SentencePiece tokenizer from a Vocab file.
-    - SPieceTokenizerLoadType.MODEL: Load SentencePiece tokenizer from a SentencePieceVocab object.
-    """
-
-    FILE = 0
-    MODEL = 1
-
-
-class Vectors(cde.Vectors):
-    """
-    Vectors object that is used to map tokens into vectors.
-    """
-
-    @classmethod
-    @check_from_file_vectors
-    def from_file(cls, file_path, max_vectors=None):
-        """
-        Build a vector from a file.
-
-        Args:
-            file_path (str): Path of the file that contains the vectors.
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
-                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
-                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
-                passing max_vectors can limit the size of the loaded set (default=None, no limit).
-
-        Examples:
-            >>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
-        """
-
-        max_vectors = max_vectors if max_vectors is not None else 0
-        return super().from_file(file_path, max_vectors)
-
-
-class FastText(cde.FastText):
-    """
-    FastText object that is used to map tokens into vectors.
-    """
-
-    @classmethod
-    @check_from_file_vectors
-    def from_file(cls, file_path, max_vectors=None):
-        """
-        Build a FastText vector from a file.
-
-        Args:
-            file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
-                must be `*.vec`.
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
-                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
-                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
-                passing max_vectors can limit the size of the loaded set (default=None, no limit).
-
-        Examples:
-            >>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
-        """
-
-        max_vectors = max_vectors if max_vectors is not None else 0
-        return super().from_file(file_path, max_vectors)
-
-
-class GloVe(cde.GloVe):
-    """
-    GloVe object that is used to map tokens into vectors.
-    """
-
-    @classmethod
-    @check_from_file_vectors
-    def from_file(cls, file_path, max_vectors=None):
-        """
-        Build a GloVe vector from a file.
-
-        Args:
-            file_path (str): Path of the file that contains the vectors. The format of pre-trained vector sets
-                must be `glove.6B.*.txt`.
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
-                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
-                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
-                passing max_vectors can limit the size of the loaded set (default=None, no limit).
-
-        Examples:
-            >>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
-        """
-
-        max_vectors = max_vectors if max_vectors is not None else 0
-        return super().from_file(file_path, max_vectors)
-
-
-class CharNGram(cde.CharNGram):
-    """
-    CharNGram object that is used to map tokens into pre-trained vectors.
-    """
-
-    @classmethod
-    @check_from_file_vectors
-    def from_file(cls, file_path, max_vectors=None):
-        """
-        Build a CharNGram vector from a file.
-
-        Args:
-            file_path (str): Path of the file that contains the CharNGram vectors.
-            max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
-                Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
-                situations where the entire set doesn’t fit in memory, or is not needed for another reason,
-                passing max_vectors can limit the size of the loaded set (default=None, no limit).
-
-        Examples:
-            >>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
-        """
-
-        max_vectors = max_vectors if max_vectors is not None else 0
-        return super().from_file(file_path, max_vectors)
+    return np.char.decode(array, encoding)
--- a/mindspore/python/mindspore/dataset/transforms/init.py
+++ b/mindspore/python/mindspore/dataset/transforms/init.py
@ -39,5 +39,5 @@ from .. import vision
 from . import c_transforms
 from . import py_transforms
 from . import transforms
-from .transforms import not_random, Relational, Compose, Concatenate, Duplicate, Fill, Mask, OneHot, PadEnd, Plugin, \
-    RandomApply, RandomChoice, RandomOrder, Slice, TypeCast, Unique
+from .transforms import Compose, Concatenate, Duplicate, Fill, Mask, OneHot, PadEnd, Plugin, RandomApply, \
+    RandomChoice, RandomOrder, Relational, Slice, TypeCast, Unique, not_random
--- a/mindspore/python/mindspore/dataset/transforms/transforms.py
+++ b/mindspore/python/mindspore/dataset/transforms/transforms.py
@ -459,36 +459,6 @@ class Fill(TensorOperation):
        return cde.FillOperation(self.fill_value)


-class Relational(IntEnum):
-    """
-    Relationship operator.
-
-    Possible enumeration values are: Relational.EQ, Relational.NE, Relational.GT, Relational.GE, Relational.LT,
-    Relational.LE.
-
-    - Relational.EQ: refers to Equality.
-    - Relational.NE: refers not equal, or Inequality.
-    - Relational.GT: refers to Greater than.
-    - Relational.GE: refers to Greater than or equal to.
-    - Relational.LT: refers to Less than.
-    - Relational.LE: refers to Less than or equal to.
-    """
-    EQ = 0
-    NE = 1
-    GT = 2
-    GE = 3
-    LT = 4
-    LE = 5
-
-
-DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
-                   Relational.NE: cde.RelationalOp.NE,
-                   Relational.GT: cde.RelationalOp.GT,
-                   Relational.GE: cde.RelationalOp.GE,
-                   Relational.LT: cde.RelationalOp.LT,
-                   Relational.LE: cde.RelationalOp.LE}
-
-
 class Mask(TensorOperation):
    r"""
    Mask content of the input tensor with the given predicate.
@ -808,6 +778,36 @@ class RandomOrder(PyTensorOperation):
        return util.random_order(img, self.transforms)


+class Relational(IntEnum):
+    """
+    Relationship operator.
+
+    Possible enumeration values are: Relational.EQ, Relational.NE, Relational.GT, Relational.GE, Relational.LT,
+    Relational.LE.
+
+    - Relational.EQ: refers to Equality.
+    - Relational.NE: refers not equal, or Inequality.
+    - Relational.GT: refers to Greater than.
+    - Relational.GE: refers to Greater than or equal to.
+    - Relational.LT: refers to Less than.
+    - Relational.LE: refers to Less than or equal to.
+    """
+    EQ = 0
+    NE = 1
+    GT = 2
+    GE = 3
+    LT = 4
+    LE = 5
+
+
+DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
+                   Relational.NE: cde.RelationalOp.NE,
+                   Relational.GT: cde.RelationalOp.GT,
+                   Relational.GE: cde.RelationalOp.GE,
+                   Relational.LT: cde.RelationalOp.LT,
+                   Relational.LE: cde.RelationalOp.LE}
+
+
 class _SliceOption(cde.SliceOption):
    """
    Internal class SliceOption to be used with SliceOperation
--- a/mindspore/python/mindspore/dataset/vision/init.py
+++ b/mindspore/python/mindspore/dataset/vision/init.py
@ -39,14 +39,15 @@ Descriptions of common data processing terms are as follows:
 from . import c_transforms
 from . import py_transforms
 from . import transforms
-from .transforms import not_random, AdjustGamma, AutoAugment, AutoContrast, BoundingBoxAugment, CenterCrop, \
-    ConvertColor, Crop, CutMixBatch, CutOut, Decode, Equalize, FiveCrop, GaussianBlur, Grayscale, HorizontalFlip, \
-    HsvToRgb, HWC2CHW, Invert, LinearTransformation, MixUpBatch, MixUp, NormalizePad, Normalize, Pad, PadToSize, \
-    RandomAdjustSharpness, RandomAffine, RandomAutoContrast, RandomColorAdjust, RandomColor, RandomCropDecodeResize, \
-    RandomCrop, RandomCropWithBBox, RandomEqualize, RandomErasing, RandomGrayscale, RandomHorizontalFlip, \
+from . import utils
+from .transforms import AdjustGamma, AutoAugment, AutoContrast, BoundingBoxAugment, CenterCrop, ConvertColor, Crop, \
+    CutMixBatch, CutOut, Decode, Equalize, FiveCrop, GaussianBlur, Grayscale, HorizontalFlip, HsvToRgb, HWC2CHW, \
+    Invert, LinearTransformation, MixUp, MixUpBatch, Normalize, NormalizePad, Pad, PadToSize, RandomAdjustSharpness, \
+    RandomAffine, RandomAutoContrast, RandomColor, RandomColorAdjust, RandomCrop, RandomCropDecodeResize, \
+    RandomCropWithBBox, RandomEqualize, RandomErasing, RandomGrayscale, RandomHorizontalFlip, \
    RandomHorizontalFlipWithBBox, RandomInvert, RandomLighting, RandomPerspective, RandomPosterize, RandomResizedCrop, \
    RandomResizedCropWithBBox, RandomResize, RandomResizeWithBBox, RandomRotation, RandomSelectSubpolicy, \
    RandomSharpness, RandomSolarize, RandomVerticalFlip, RandomVerticalFlipWithBBox, Rescale, Resize, ResizeWithBBox, \
-    RgbToHsv, Rotate, SlicePatches, TenCrop, ToNumpy, ToPIL, ToTensor, ToType, UniformAugment, VerticalFlip
-from .utils import Inter, Border, ConvertMode, ImageBatchFormat, SliceMode, AutoAugmentPolicy, get_image_num_channels, \
+    RgbToHsv, Rotate, SlicePatches, TenCrop, ToNumpy, ToPIL, ToTensor, ToType, UniformAugment, VerticalFlip, not_random
+from .utils import AutoAugmentPolicy, Border, ConvertMode, ImageBatchFormat, Inter, SliceMode, get_image_num_channels, \
    get_image_size
--- a/mindspore/python/mindspore/dataset/vision/utils.py
+++ b/mindspore/python/mindspore/dataset/vision/utils.py
@ -24,227 +24,6 @@ from mindspore import log as logger
 import mindspore._c_dataengine as cde


-class Inter(IntEnum):
-    """
-    Interpolation Modes.
-
-    Possible enumeration values are: Inter.NEAREST, Inter.ANTIALIAS, Inter.LINEAR, Inter.BILINEAR, Inter.CUBIC,
-    Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC.
-
-    - Inter.NEAREST: means interpolation method is nearest-neighbor interpolation.
-    - Inter.ANTIALIAS: means the interpolation method is antialias interpolation.
-    - Inter.LINEAR: means interpolation method is bilinear interpolation, here is the same as Inter.BILINEAR.
-    - Inter.BILINEAR: means interpolation method is bilinear interpolation.
-    - Inter.CUBIC: means the interpolation method is bicubic interpolation, here is the same as Inter.BICUBIC.
-    - Inter.BICUBIC: means the interpolation method is bicubic interpolation.
-    - Inter.AREA: means interpolation method is pixel area interpolation.
-    - Inter.PILCUBIC: means interpolation method is bicubic interpolation like implemented in pillow, input
-      should be in 3 channels format.
-    """
-    NEAREST = 0
-    ANTIALIAS = 1
-    BILINEAR = LINEAR = 2
-    BICUBIC = CUBIC = 3
-    AREA = 4
-    PILCUBIC = 5
-
-    @staticmethod
-    def to_python_type(inter_type):
-        """
-        Function to return Python type for Interpolation Mode.
-        """
-        if Image.__version__ >= "9.1.0":
-            python_values = {Inter.NEAREST: Image.Resampling.NEAREST,
-                             Inter.ANTIALIAS: Image.Resampling.LANCZOS,
-                             Inter.LINEAR: Image.Resampling.BILINEAR,
-                             Inter.CUBIC: Image.Resampling.BICUBIC}
-        else:
-            python_values = {Inter.NEAREST: Image.NEAREST,
-                             Inter.ANTIALIAS: Image.ANTIALIAS,
-                             Inter.LINEAR: Image.LINEAR,
-                             Inter.CUBIC: Image.CUBIC}
-        return python_values.get(inter_type)
-
-    @staticmethod
-    def to_c_type(inter_type):
-        """
-        Function to return C type for Interpolation Mode.
-        """
-        c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
-                    Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
-                    Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
-                    Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
-                    Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}
-
-        return c_values.get(inter_type)
-
-
-class Border(str, Enum):
-    """
-    Padding Mode, Border Type.
-
-    Possible enumeration values are: Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC.
-
-    - Border.CONSTANT: means it fills the border with constant values.
-    - Border.EDGE: means it pads with the last value on the edge.
-    - Border.REFLECT: means it reflects the values on the edge omitting the last value of edge.
-    - Border.SYMMETRIC: means it reflects the values on the edge repeating the last value of edge.
-
-    Note: This class derived from class str to support json serializable.
-    """
-    CONSTANT: str = "constant"
-    EDGE: str = "edge"
-    REFLECT: str = "reflect"
-    SYMMETRIC: str = "symmetric"
-
-    @staticmethod
-    def to_python_type(border_type):
-        """
-        Function to return Python type for Border Type.
-        """
-        python_values = {Border.CONSTANT: 'constant',
-                         Border.EDGE: 'edge',
-                         Border.REFLECT: 'reflect',
-                         Border.SYMMETRIC: 'symmetric'}
-        return python_values.get(border_type)
-
-    @staticmethod
-    def to_c_type(border_type):
-        """
-        Function to return C type for Border Type.
-        """
-        c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
-                    Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
-                    Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
-                    Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
-
-        return c_values.get(border_type)
-
-
-class ImageBatchFormat(IntEnum):
-    """
-    Data Format of images after batch operation.
-
-    Possible enumeration values are: ImageBatchFormat.NHWC, ImageBatchFormat.NCHW.
-
-    - ImageBatchFormat.NHWC: in orders like, batch N, height H, width W, channels C to store the data.
-    - ImageBatchFormat.NCHW: in orders like, batch N, channels C, height H, width W to store the data.
-    """
-    NHWC = 0
-    NCHW = 1
-
-    @staticmethod
-    def to_c_type(image_batch_format):
-        """
-        Function to return C type for ImageBatchFormat.
-        """
-        c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
-                    ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
-
-        return c_values.get(image_batch_format)
-
-
-class ConvertMode(IntEnum):
-    """
-    The color conversion mode.
-
-    Possible enumeration values are as follows:
-
-    - ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images.
-    - ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images.
-    - ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images.
-    - ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images.
-    - ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images.
-    - ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images.
-    - ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images.
-    - ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images.
-    - ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images.
-    - ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images.
-    - ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images.
-    - ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images.
-    - ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images.
-    - ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images.
-    - ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images.
-    - ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images.
-    - ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images.
-    - ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images.
-    - ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images.
-    - ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images.
-    """
-    COLOR_BGR2BGRA = 0
-    COLOR_RGB2RGBA = COLOR_BGR2BGRA
-    COLOR_BGRA2BGR = 1
-    COLOR_RGBA2RGB = COLOR_BGRA2BGR
-    COLOR_BGR2RGBA = 2
-    COLOR_RGB2BGRA = COLOR_BGR2RGBA
-    COLOR_RGBA2BGR = 3
-    COLOR_BGRA2RGB = COLOR_RGBA2BGR
-    COLOR_BGR2RGB = 4
-    COLOR_RGB2BGR = COLOR_BGR2RGB
-    COLOR_BGRA2RGBA = 5
-    COLOR_RGBA2BGRA = COLOR_BGRA2RGBA
-    COLOR_BGR2GRAY = 6
-    COLOR_RGB2GRAY = 7
-    COLOR_GRAY2BGR = 8
-    COLOR_GRAY2RGB = COLOR_GRAY2BGR
-    COLOR_GRAY2BGRA = 9
-    COLOR_GRAY2RGBA = COLOR_GRAY2BGRA
-    COLOR_BGRA2GRAY = 10
-    COLOR_RGBA2GRAY = 11
-
-    @staticmethod
-    def to_c_type(mode):
-        """
-        Function to return C type for color mode.
-        """
-        c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
-                    ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
-                    ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
-                    ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
-                    ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
-                    ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
-                    ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
-                    ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
-                    ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
-                    ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
-                    ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
-                    ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
-                    ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
-                    ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
-                    ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
-                    ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
-                    ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
-                    ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
-                    ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
-                    ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
-                    }
-
-        return c_values.get(mode)
-
-
-class SliceMode(IntEnum):
-    """
-    Mode to Slice Tensor into multiple parts.
-
-    Possible enumeration values are: SliceMode.PAD, SliceMode.DROP.
-
-    - SliceMode.PAD: pad some pixels before slice the Tensor if needed.
-    - SliceMode.DROP: drop remainder pixels before slice the Tensor if needed.
-    """
-    PAD = 0
-    DROP = 1
-
-    @staticmethod
-    def to_c_type(mode):
-        """
-        Function to return C type for SliceMode.
-        """
-        c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
-                    SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}
-
-        return c_values.get(mode)
-
-
 class AutoAugmentPolicy(str, Enum):
    """
    AutoAugment policy for different datasets.
@ -331,22 +110,225 @@ class AutoAugmentPolicy(str, Enum):
        return c_values.get(policy)


-def parse_padding(padding):
-    """ Parses and prepares the padding tuple"""
+class Border(str, Enum):
+    """
+    Padding Mode, Border Type.

-    if isinstance(padding, numbers.Number):
-        padding = [padding] * 4
-    if len(padding) == 2:
-        logger.warning("The behavior when `padding` is a sequence of length 2 will change from padding left/top "
-                       "with the first value and right/bottom with the second, to padding left/right with the "
-                       "first one and top/bottom with the second in the future. Or you can pass in a 4-element "
-                       "sequence to specify left, top, right and bottom respectively.")
-        left = top = padding[0]
-        right = bottom = padding[1]
-        padding = (left, top, right, bottom,)
-    if isinstance(padding, list):
-        padding = tuple(padding)
-    return padding
+    Possible enumeration values are: Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC.
+
+    - Border.CONSTANT: means it fills the border with constant values.
+    - Border.EDGE: means it pads with the last value on the edge.
+    - Border.REFLECT: means it reflects the values on the edge omitting the last value of edge.
+    - Border.SYMMETRIC: means it reflects the values on the edge repeating the last value of edge.
+
+    Note: This class derived from class str to support json serializable.
+    """
+    CONSTANT: str = "constant"
+    EDGE: str = "edge"
+    REFLECT: str = "reflect"
+    SYMMETRIC: str = "symmetric"
+
+    @staticmethod
+    def to_python_type(border_type):
+        """
+        Function to return Python type for Border Type.
+        """
+        python_values = {Border.CONSTANT: 'constant',
+                         Border.EDGE: 'edge',
+                         Border.REFLECT: 'reflect',
+                         Border.SYMMETRIC: 'symmetric'}
+        return python_values.get(border_type)
+
+    @staticmethod
+    def to_c_type(border_type):
+        """
+        Function to return C type for Border Type.
+        """
+        c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
+                    Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
+                    Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
+                    Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
+
+        return c_values.get(border_type)
+
+
+class ConvertMode(IntEnum):
+    """
+    The color conversion mode.
+
+    Possible enumeration values are as follows:
+
+    - ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images.
+    - ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images.
+    - ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images.
+    - ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images.
+    - ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images.
+    - ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images.
+    - ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images.
+    - ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images.
+    - ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images.
+    - ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images.
+    - ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images.
+    - ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images.
+    - ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images.
+    - ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images.
+    - ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images.
+    - ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images.
+    - ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images.
+    - ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images.
+    - ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images.
+    - ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images.
+    """
+    COLOR_BGR2BGRA = 0
+    COLOR_RGB2RGBA = COLOR_BGR2BGRA
+    COLOR_BGRA2BGR = 1
+    COLOR_RGBA2RGB = COLOR_BGRA2BGR
+    COLOR_BGR2RGBA = 2
+    COLOR_RGB2BGRA = COLOR_BGR2RGBA
+    COLOR_RGBA2BGR = 3
+    COLOR_BGRA2RGB = COLOR_RGBA2BGR
+    COLOR_BGR2RGB = 4
+    COLOR_RGB2BGR = COLOR_BGR2RGB
+    COLOR_BGRA2RGBA = 5
+    COLOR_RGBA2BGRA = COLOR_BGRA2RGBA
+    COLOR_BGR2GRAY = 6
+    COLOR_RGB2GRAY = 7
+    COLOR_GRAY2BGR = 8
+    COLOR_GRAY2RGB = COLOR_GRAY2BGR
+    COLOR_GRAY2BGRA = 9
+    COLOR_GRAY2RGBA = COLOR_GRAY2BGRA
+    COLOR_BGRA2GRAY = 10
+    COLOR_RGBA2GRAY = 11
+
+    @staticmethod
+    def to_c_type(mode):
+        """
+        Function to return C type for color mode.
+        """
+        c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
+                    ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
+                    ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
+                    ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
+                    ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
+                    ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
+                    ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
+                    ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
+                    ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
+                    ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
+                    ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
+                    ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
+                    ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
+                    ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
+                    ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
+                    ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
+                    ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
+                    ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
+                    ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
+                    ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
+                    }
+
+        return c_values.get(mode)
+
+
+class ImageBatchFormat(IntEnum):
+    """
+    Data Format of images after batch operation.
+
+    Possible enumeration values are: ImageBatchFormat.NHWC, ImageBatchFormat.NCHW.
+
+    - ImageBatchFormat.NHWC: in orders like, batch N, height H, width W, channels C to store the data.
+    - ImageBatchFormat.NCHW: in orders like, batch N, channels C, height H, width W to store the data.
+    """
+    NHWC = 0
+    NCHW = 1
+
+    @staticmethod
+    def to_c_type(image_batch_format):
+        """
+        Function to return C type for ImageBatchFormat.
+        """
+        c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
+                    ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
+
+        return c_values.get(image_batch_format)
+
+
+class Inter(IntEnum):
+    """
+    Interpolation Modes.
+
+    Possible enumeration values are: Inter.NEAREST, Inter.ANTIALIAS, Inter.LINEAR, Inter.BILINEAR, Inter.CUBIC,
+    Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC.
+
+    - Inter.NEAREST: means interpolation method is nearest-neighbor interpolation.
+    - Inter.ANTIALIAS: means the interpolation method is antialias interpolation.
+    - Inter.LINEAR: means interpolation method is bilinear interpolation, here is the same as Inter.BILINEAR.
+    - Inter.BILINEAR: means interpolation method is bilinear interpolation.
+    - Inter.CUBIC: means the interpolation method is bicubic interpolation, here is the same as Inter.BICUBIC.
+    - Inter.BICUBIC: means the interpolation method is bicubic interpolation.
+    - Inter.AREA: means interpolation method is pixel area interpolation.
+    - Inter.PILCUBIC: means interpolation method is bicubic interpolation like implemented in pillow, input
+      should be in 3 channels format.
+    """
+    NEAREST = 0
+    ANTIALIAS = 1
+    BILINEAR = LINEAR = 2
+    BICUBIC = CUBIC = 3
+    AREA = 4
+    PILCUBIC = 5
+
+    @staticmethod
+    def to_python_type(inter_type):
+        """
+        Function to return Python type for Interpolation Mode.
+        """
+        if Image.__version__ >= "9.1.0":
+            python_values = {Inter.NEAREST: Image.Resampling.NEAREST,
+                             Inter.ANTIALIAS: Image.Resampling.LANCZOS,
+                             Inter.LINEAR: Image.Resampling.BILINEAR,
+                             Inter.CUBIC: Image.Resampling.BICUBIC}
+        else:
+            python_values = {Inter.NEAREST: Image.NEAREST,
+                             Inter.ANTIALIAS: Image.ANTIALIAS,
+                             Inter.LINEAR: Image.LINEAR,
+                             Inter.CUBIC: Image.CUBIC}
+        return python_values.get(inter_type)
+
+    @staticmethod
+    def to_c_type(inter_type):
+        """
+        Function to return C type for Interpolation Mode.
+        """
+        c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
+                    Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
+                    Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
+                    Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
+                    Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}
+
+        return c_values.get(inter_type)
+
+
+class SliceMode(IntEnum):
+    """
+    Mode to Slice Tensor into multiple parts.
+
+    Possible enumeration values are: SliceMode.PAD, SliceMode.DROP.
+
+    - SliceMode.PAD: pad some pixels before slice the Tensor if needed.
+    - SliceMode.DROP: drop remainder pixels before slice the Tensor if needed.
+    """
+    PAD = 0
+    DROP = 1
+
+    @staticmethod
+    def to_c_type(mode):
+        """
+        Function to return C type for SliceMode.
+        """
+        c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
+                    SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}
+
+        return c_values.get(mode)


 def get_image_num_channels(image):
@ -397,3 +379,21 @@ def get_image_size(image):
        return size_list

    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image)))
+
+
+def parse_padding(padding):
+    """ Parses and prepares the padding tuple"""
+
+    if isinstance(padding, numbers.Number):
+        padding = [padding] * 4
+    if len(padding) == 2:
+        logger.warning("The behavior when `padding` is a sequence of length 2 will change from padding left/top "
+                       "with the first value and right/bottom with the second, to padding left/right with the "
+                       "first one and top/bottom with the second in the future. Or you can pass in a 4-element "
+                       "sequence to specify left, top, right and bottom respectively.")
+        left = top = padding[0]
+        right = bottom = padding[1]
+        padding = (left, top, right, bottom,)
+    if isinstance(padding, list):
+        padding = tuple(padding)
+    return padding