!35949 Add new APIs and fix doc problems
Merge pull request !35949 from xiaotianci/add_api
This commit is contained in:
commit
5cf4a18d06
|
@ -3,30 +3,75 @@ mindspore.dataset.audio
|
|||
|
||||
.. include:: dataset_audio/mindspore.dataset.audio.rst
|
||||
|
||||
mindspore.dataset.audio.transforms
|
||||
----------------------------------
|
||||
Transforms
|
||||
----------
|
||||
|
||||
.. mscnautosummary::
|
||||
:toctree: dataset_audio
|
||||
|
||||
mindspore.dataset.audio.transforms.AllpassBiquad
|
||||
mindspore.dataset.audio.transforms.AmplitudeToDB
|
||||
mindspore.dataset.audio.transforms.Angle
|
||||
mindspore.dataset.audio.transforms.BandBiquad
|
||||
mindspore.dataset.audio.transforms.BandpassBiquad
|
||||
mindspore.dataset.audio.transforms.BandrejectBiquad
|
||||
mindspore.dataset.audio.transforms.BassBiquad
|
||||
mindspore.dataset.audio.transforms.ComplexNorm
|
||||
mindspore.dataset.audio.transforms.Contrast
|
||||
mindspore.dataset.audio.transforms.FrequencyMasking
|
||||
mindspore.dataset.audio.transforms.LowpassBiquad
|
||||
mindspore.dataset.audio.transforms.TimeMasking
|
||||
mindspore.dataset.audio.transforms.TimeStretch
|
||||
mindspore.dataset.audio.AllpassBiquad
|
||||
mindspore.dataset.audio.AmplitudeToDB
|
||||
mindspore.dataset.audio.Angle
|
||||
mindspore.dataset.audio.BandBiquad
|
||||
mindspore.dataset.audio.BandpassBiquad
|
||||
mindspore.dataset.audio.BandrejectBiquad
|
||||
mindspore.dataset.audio.BassBiquad
|
||||
mindspore.dataset.audio.Biquad
|
||||
mindspore.dataset.audio.ComplexNorm
|
||||
mindspore.dataset.audio.ComputeDeltas
|
||||
mindspore.dataset.audio.Contrast
|
||||
mindspore.dataset.audio.DBToAmplitude
|
||||
mindspore.dataset.audio.DCShift
|
||||
mindspore.dataset.audio.DeemphBiquad
|
||||
mindspore.dataset.audio.DetectPitchFrequency
|
||||
mindspore.dataset.audio.Dither
|
||||
mindspore.dataset.audio.EqualizerBiquad
|
||||
mindspore.dataset.audio.Fade
|
||||
mindspore.dataset.audio.Flanger
|
||||
mindspore.dataset.audio.FrequencyMasking
|
||||
mindspore.dataset.audio.Gain
|
||||
mindspore.dataset.audio.GriffinLim
|
||||
mindspore.dataset.audio.HighpassBiquad
|
||||
mindspore.dataset.audio.InverseMelScale
|
||||
mindspore.dataset.audio.LFilter
|
||||
mindspore.dataset.audio.LowpassBiquad
|
||||
mindspore.dataset.audio.Magphase
|
||||
mindspore.dataset.audio.MaskAlongAxis
|
||||
mindspore.dataset.audio.MaskAlongAxisIID
|
||||
mindspore.dataset.audio.MelScale
|
||||
mindspore.dataset.audio.MuLawDecoding
|
||||
mindspore.dataset.audio.MuLawEncoding
|
||||
mindspore.dataset.audio.Overdrive
|
||||
mindspore.dataset.audio.Phaser
|
||||
mindspore.dataset.audio.PhaseVocoder
|
||||
mindspore.dataset.audio.Resample
|
||||
mindspore.dataset.audio.RiaaBiquad
|
||||
mindspore.dataset.audio.SlidingWindowCmn
|
||||
mindspore.dataset.audio.SpectralCentroid
|
||||
mindspore.dataset.audio.Spectrogram
|
||||
mindspore.dataset.audio.TimeMasking
|
||||
mindspore.dataset.audio.TimeStretch
|
||||
mindspore.dataset.audio.TrebleBiquad
|
||||
mindspore.dataset.audio.Vad
|
||||
mindspore.dataset.audio.Vol
|
||||
|
||||
mindspore.dataset.audio.utils
|
||||
-----------------------------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. mscnautosummary::
|
||||
:toctree: dataset_audio
|
||||
|
||||
mindspore.dataset.audio.utils.ScaleType
|
||||
mindspore.dataset.audio.BorderType
|
||||
mindspore.dataset.audio.DensityFunction
|
||||
mindspore.dataset.audio.FadeShape
|
||||
mindspore.dataset.audio.GainType
|
||||
mindspore.dataset.audio.Interpolation
|
||||
mindspore.dataset.audio.MelType
|
||||
mindspore.dataset.audio.Modulation
|
||||
mindspore.dataset.audio.NormMode
|
||||
mindspore.dataset.audio.NormType
|
||||
mindspore.dataset.audio.ResampleMethod
|
||||
mindspore.dataset.audio.ScaleType
|
||||
mindspore.dataset.audio.WindowType
|
||||
mindspore.dataset.audio.create_dct
|
||||
mindspore.dataset.audio.melscale_fbanks
|
||||
|
|
|
@ -19,48 +19,54 @@ mindspore.dataset.text
|
|||
- TensorOperation,所有C++实现的数据处理操作的基类。
|
||||
- TextTensorOperation,所有文本数据处理操作的基类,派生自TensorOperation。
|
||||
|
||||
mindspore.dataset.text.transforms
|
||||
---------------------------------
|
||||
Transforms
|
||||
----------
|
||||
|
||||
.. mscnnoteautosummary::
|
||||
:toctree: dataset_text
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
mindspore.dataset.text.transforms.BasicTokenizer
|
||||
mindspore.dataset.text.transforms.BertTokenizer
|
||||
mindspore.dataset.text.transforms.CaseFold
|
||||
mindspore.dataset.text.transforms.JiebaTokenizer
|
||||
mindspore.dataset.text.transforms.Lookup
|
||||
mindspore.dataset.text.transforms.Ngram
|
||||
mindspore.dataset.text.transforms.NormalizeUTF8
|
||||
mindspore.dataset.text.transforms.PythonTokenizer
|
||||
mindspore.dataset.text.transforms.RegexReplace
|
||||
mindspore.dataset.text.transforms.RegexTokenizer
|
||||
mindspore.dataset.text.transforms.SentencePieceTokenizer
|
||||
mindspore.dataset.text.transforms.SlidingWindow
|
||||
mindspore.dataset.text.transforms.ToNumber
|
||||
mindspore.dataset.text.transforms.TruncateSequencePair
|
||||
mindspore.dataset.text.transforms.UnicodeCharTokenizer
|
||||
mindspore.dataset.text.transforms.UnicodeScriptTokenizer
|
||||
mindspore.dataset.text.transforms.WhitespaceTokenizer
|
||||
mindspore.dataset.text.transforms.WordpieceTokenizer
|
||||
mindspore.dataset.text.BasicTokenizer
|
||||
mindspore.dataset.text.BertTokenizer
|
||||
mindspore.dataset.text.CaseFold
|
||||
mindspore.dataset.text.FilterWikipediaXML
|
||||
mindspore.dataset.text.JiebaTokenizer
|
||||
mindspore.dataset.text.Lookup
|
||||
mindspore.dataset.text.Ngram
|
||||
mindspore.dataset.text.NormalizeUTF8
|
||||
mindspore.dataset.text.PythonTokenizer
|
||||
mindspore.dataset.text.RegexReplace
|
||||
mindspore.dataset.text.RegexTokenizer
|
||||
mindspore.dataset.text.SentencePieceTokenizer
|
||||
mindspore.dataset.text.SlidingWindow
|
||||
mindspore.dataset.text.ToNumber
|
||||
mindspore.dataset.text.ToVectors
|
||||
mindspore.dataset.text.TruncateSequencePair
|
||||
mindspore.dataset.text.UnicodeCharTokenizer
|
||||
mindspore.dataset.text.UnicodeScriptTokenizer
|
||||
mindspore.dataset.text.WhitespaceTokenizer
|
||||
mindspore.dataset.text.WordpieceTokenizer
|
||||
|
||||
|
||||
mindspore.dataset.text.utils
|
||||
----------------------------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. mscnnoteautosummary::
|
||||
:toctree: dataset_text
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
mindspore.dataset.text.CharNGram
|
||||
mindspore.dataset.text.FastText
|
||||
mindspore.dataset.text.GloVe
|
||||
mindspore.dataset.text.JiebaMode
|
||||
mindspore.dataset.text.NormalizeForm
|
||||
mindspore.dataset.text.SentencePieceModel
|
||||
mindspore.dataset.text.SentencePieceVocab
|
||||
mindspore.dataset.text.SPieceTokenizerLoadType
|
||||
mindspore.dataset.text.SPieceTokenizerOutType
|
||||
mindspore.dataset.text.to_str
|
||||
mindspore.dataset.text.to_bytes
|
||||
mindspore.dataset.text.Vectors
|
||||
mindspore.dataset.text.Vocab
|
||||
mindspore.dataset.text.to_bytes
|
||||
mindspore.dataset.text.to_str
|
||||
|
|
|
@ -44,8 +44,8 @@ Transforms
|
|||
mindspore.dataset.transforms.TypeCast
|
||||
mindspore.dataset.transforms.Unique
|
||||
|
||||
Others
|
||||
------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. mscnautosummary::
|
||||
:toctree: dataset_transforms
|
||||
|
|
|
@ -98,8 +98,8 @@ Transforms
|
|||
mindspore.dataset.vision.UniformAugment
|
||||
mindspore.dataset.vision.VerticalFlip
|
||||
|
||||
Others
|
||||
------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. mscnautosummary::
|
||||
:toctree: dataset_vision
|
||||
|
@ -112,3 +112,5 @@ Others
|
|||
mindspore.dataset.vision.ImageBatchFormat
|
||||
mindspore.dataset.vision.Inter
|
||||
mindspore.dataset.vision.SliceMode
|
||||
mindspore.dataset.vision.get_image_num_channels
|
||||
mindspore.dataset.vision.get_image_size
|
||||
|
|
|
@ -3,35 +3,80 @@ mindspore.dataset.audio
|
|||
|
||||
.. automodule:: mindspore.dataset.audio
|
||||
|
||||
mindspore.dataset.audio.transforms
|
||||
----------------------------------
|
||||
Transforms
|
||||
----------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: dataset_audio
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
mindspore.dataset.audio.transforms.AllpassBiquad
|
||||
mindspore.dataset.audio.transforms.AmplitudeToDB
|
||||
mindspore.dataset.audio.transforms.Angle
|
||||
mindspore.dataset.audio.transforms.BandBiquad
|
||||
mindspore.dataset.audio.transforms.BandpassBiquad
|
||||
mindspore.dataset.audio.transforms.BandrejectBiquad
|
||||
mindspore.dataset.audio.transforms.BassBiquad
|
||||
mindspore.dataset.audio.transforms.ComplexNorm
|
||||
mindspore.dataset.audio.transforms.Contrast
|
||||
mindspore.dataset.audio.transforms.FrequencyMasking
|
||||
mindspore.dataset.audio.transforms.LowpassBiquad
|
||||
mindspore.dataset.audio.transforms.TimeMasking
|
||||
mindspore.dataset.audio.transforms.TimeStretch
|
||||
mindspore.dataset.audio.AllpassBiquad
|
||||
mindspore.dataset.audio.AmplitudeToDB
|
||||
mindspore.dataset.audio.Angle
|
||||
mindspore.dataset.audio.BandBiquad
|
||||
mindspore.dataset.audio.BandpassBiquad
|
||||
mindspore.dataset.audio.BandrejectBiquad
|
||||
mindspore.dataset.audio.BassBiquad
|
||||
mindspore.dataset.audio.Biquad
|
||||
mindspore.dataset.audio.ComplexNorm
|
||||
mindspore.dataset.audio.ComputeDeltas
|
||||
mindspore.dataset.audio.Contrast
|
||||
mindspore.dataset.audio.DBToAmplitude
|
||||
mindspore.dataset.audio.DCShift
|
||||
mindspore.dataset.audio.DeemphBiquad
|
||||
mindspore.dataset.audio.DetectPitchFrequency
|
||||
mindspore.dataset.audio.Dither
|
||||
mindspore.dataset.audio.EqualizerBiquad
|
||||
mindspore.dataset.audio.Fade
|
||||
mindspore.dataset.audio.Flanger
|
||||
mindspore.dataset.audio.FrequencyMasking
|
||||
mindspore.dataset.audio.Gain
|
||||
mindspore.dataset.audio.GriffinLim
|
||||
mindspore.dataset.audio.HighpassBiquad
|
||||
mindspore.dataset.audio.InverseMelScale
|
||||
mindspore.dataset.audio.LFilter
|
||||
mindspore.dataset.audio.LowpassBiquad
|
||||
mindspore.dataset.audio.Magphase
|
||||
mindspore.dataset.audio.MaskAlongAxis
|
||||
mindspore.dataset.audio.MaskAlongAxisIID
|
||||
mindspore.dataset.audio.MelScale
|
||||
mindspore.dataset.audio.MuLawDecoding
|
||||
mindspore.dataset.audio.MuLawEncoding
|
||||
mindspore.dataset.audio.Overdrive
|
||||
mindspore.dataset.audio.Phaser
|
||||
mindspore.dataset.audio.PhaseVocoder
|
||||
mindspore.dataset.audio.Resample
|
||||
mindspore.dataset.audio.RiaaBiquad
|
||||
mindspore.dataset.audio.SlidingWindowCmn
|
||||
mindspore.dataset.audio.SpectralCentroid
|
||||
mindspore.dataset.audio.Spectrogram
|
||||
mindspore.dataset.audio.TimeMasking
|
||||
mindspore.dataset.audio.TimeStretch
|
||||
mindspore.dataset.audio.TrebleBiquad
|
||||
mindspore.dataset.audio.Vad
|
||||
mindspore.dataset.audio.Vol
|
||||
|
||||
|
||||
mindspore.dataset.audio.utils
|
||||
-----------------------------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: dataset_audio
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
mindspore.dataset.audio.utils.ScaleType
|
||||
mindspore.dataset.audio.BorderType
|
||||
mindspore.dataset.audio.DensityFunction
|
||||
mindspore.dataset.audio.FadeShape
|
||||
mindspore.dataset.audio.GainType
|
||||
mindspore.dataset.audio.Interpolation
|
||||
mindspore.dataset.audio.MelType
|
||||
mindspore.dataset.audio.Modulation
|
||||
mindspore.dataset.audio.NormMode
|
||||
mindspore.dataset.audio.NormType
|
||||
mindspore.dataset.audio.ResampleMethod
|
||||
mindspore.dataset.audio.ScaleType
|
||||
mindspore.dataset.audio.WindowType
|
||||
mindspore.dataset.audio.create_dct
|
||||
mindspore.dataset.audio.melscale_fbanks
|
||||
|
|
|
@ -3,48 +3,54 @@ mindspore.dataset.text
|
|||
|
||||
.. automodule:: mindspore.dataset.text
|
||||
|
||||
mindspore.dataset.text.transforms
|
||||
---------------------------------
|
||||
Transforms
|
||||
----------
|
||||
|
||||
.. msnoteautosummary::
|
||||
:toctree: dataset_text
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
mindspore.dataset.text.transforms.BasicTokenizer
|
||||
mindspore.dataset.text.transforms.BertTokenizer
|
||||
mindspore.dataset.text.transforms.CaseFold
|
||||
mindspore.dataset.text.transforms.JiebaTokenizer
|
||||
mindspore.dataset.text.transforms.Lookup
|
||||
mindspore.dataset.text.transforms.Ngram
|
||||
mindspore.dataset.text.transforms.NormalizeUTF8
|
||||
mindspore.dataset.text.transforms.PythonTokenizer
|
||||
mindspore.dataset.text.transforms.RegexReplace
|
||||
mindspore.dataset.text.transforms.RegexTokenizer
|
||||
mindspore.dataset.text.transforms.SentencePieceTokenizer
|
||||
mindspore.dataset.text.transforms.SlidingWindow
|
||||
mindspore.dataset.text.transforms.ToNumber
|
||||
mindspore.dataset.text.transforms.TruncateSequencePair
|
||||
mindspore.dataset.text.transforms.UnicodeCharTokenizer
|
||||
mindspore.dataset.text.transforms.UnicodeScriptTokenizer
|
||||
mindspore.dataset.text.transforms.WhitespaceTokenizer
|
||||
mindspore.dataset.text.transforms.WordpieceTokenizer
|
||||
mindspore.dataset.text.BasicTokenizer
|
||||
mindspore.dataset.text.BertTokenizer
|
||||
mindspore.dataset.text.CaseFold
|
||||
mindspore.dataset.text.FilterWikipediaXML
|
||||
mindspore.dataset.text.JiebaTokenizer
|
||||
mindspore.dataset.text.Lookup
|
||||
mindspore.dataset.text.Ngram
|
||||
mindspore.dataset.text.NormalizeUTF8
|
||||
mindspore.dataset.text.PythonTokenizer
|
||||
mindspore.dataset.text.RegexReplace
|
||||
mindspore.dataset.text.RegexTokenizer
|
||||
mindspore.dataset.text.SentencePieceTokenizer
|
||||
mindspore.dataset.text.SlidingWindow
|
||||
mindspore.dataset.text.ToNumber
|
||||
mindspore.dataset.text.ToVectors
|
||||
mindspore.dataset.text.TruncateSequencePair
|
||||
mindspore.dataset.text.UnicodeCharTokenizer
|
||||
mindspore.dataset.text.UnicodeScriptTokenizer
|
||||
mindspore.dataset.text.WhitespaceTokenizer
|
||||
mindspore.dataset.text.WordpieceTokenizer
|
||||
|
||||
|
||||
mindspore.dataset.text.utils
|
||||
----------------------------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. msnoteautosummary::
|
||||
:toctree: dataset_text
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
mindspore.dataset.text.CharNGram
|
||||
mindspore.dataset.text.FastText
|
||||
mindspore.dataset.text.GloVe
|
||||
mindspore.dataset.text.JiebaMode
|
||||
mindspore.dataset.text.NormalizeForm
|
||||
mindspore.dataset.text.SentencePieceModel
|
||||
mindspore.dataset.text.SentencePieceVocab
|
||||
mindspore.dataset.text.SPieceTokenizerLoadType
|
||||
mindspore.dataset.text.SPieceTokenizerOutType
|
||||
mindspore.dataset.text.to_str
|
||||
mindspore.dataset.text.to_bytes
|
||||
mindspore.dataset.text.Vectors
|
||||
mindspore.dataset.text.Vocab
|
||||
mindspore.dataset.text.to_bytes
|
||||
mindspore.dataset.text.to_str
|
||||
|
|
|
@ -25,8 +25,8 @@ Transforms
|
|||
mindspore.dataset.transforms.TypeCast
|
||||
mindspore.dataset.transforms.Unique
|
||||
|
||||
Others
|
||||
------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: dataset_transforms
|
||||
|
|
|
@ -77,8 +77,8 @@ Transforms
|
|||
mindspore.dataset.vision.UniformAugment
|
||||
mindspore.dataset.vision.VerticalFlip
|
||||
|
||||
Others
|
||||
------
|
||||
Utilities
|
||||
---------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: dataset_vision
|
||||
|
@ -91,3 +91,5 @@ Others
|
|||
mindspore.dataset.vision.ImageBatchFormat
|
||||
mindspore.dataset.vision.Inter
|
||||
mindspore.dataset.vision.SliceMode
|
||||
mindspore.dataset.vision.get_image_num_channels
|
||||
mindspore.dataset.vision.get_image_size
|
||||
|
|
|
@ -37,12 +37,13 @@ Descriptions of common data processing terms are as follows:
|
|||
- TensorOperation, the base class of all data processing operations implemented in C++.
|
||||
- AudioTensorOperation, the base class of all audio processing operations. It is a derived class of TensorOperation.
|
||||
"""
|
||||
from . import transforms
|
||||
from . import utils
|
||||
from .transforms import AllpassBiquad, AmplitudeToDB, Angle, BandBiquad, BandpassBiquad, BandrejectBiquad, BassBiquad, \
|
||||
Biquad, ComplexNorm, ComputeDeltas, Contrast, DBToAmplitude, DCShift, DeemphBiquad, DetectPitchFrequency, Dither, \
|
||||
EqualizerBiquad, Fade, Flanger, FrequencyMasking, Gain, GriffinLim, HighpassBiquad, InverseMelScale, LFilter, \
|
||||
LowpassBiquad, Magphase, MaskAlongAxis, MaskAlongAxisIID, MelScale, MuLawDecoding, MuLawEncoding, Overdrive, \
|
||||
Phaser, PhaseVocoder, Resample, RiaaBiquad, SlidingWindowCmn, SpectralCentroid, Spectrogram, TimeMasking, \
|
||||
TimeStretch, TrebleBiquad, Vol
|
||||
from . import transforms
|
||||
from .utils import create_dct, melscale_fbanks, BorderType, DensityFunction, FadeShape, GainType, Interpolation, \
|
||||
MelType, Modulation, NormMode, NormType, ResampleMethod, ScaleType, WindowType
|
||||
TimeStretch, TrebleBiquad, Vad, Vol
|
||||
from .utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, MelType, Modulation, NormMode, \
|
||||
NormType, ResampleMethod, ScaleType, WindowType, create_dct, melscale_fbanks
|
||||
|
|
|
@ -30,22 +30,14 @@ Descriptions of common data processing terms are as follows:
|
|||
- TextTensorOperation, the base class of all text processing operations. It is a derived class of TensorOperation.
|
||||
"""
|
||||
import platform
|
||||
from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, \
|
||||
TruncateSequencePair, ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer, ToVectors
|
||||
from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
|
||||
SPieceTokenizerOutType, SPieceTokenizerLoadType, Vectors, FastText, GloVe, CharNGram
|
||||
|
||||
__all__ = [
|
||||
"Lookup", "JiebaTokenizer", "UnicodeCharTokenizer", "Ngram",
|
||||
"to_str", "to_bytes", "Vocab", "WordpieceTokenizer", "TruncateSequencePair", "ToNumber",
|
||||
"PythonTokenizer", "SlidingWindow", "SentencePieceVocab", "SentencePieceTokenizer", "SPieceTokenizerOutType",
|
||||
"SentencePieceModel", "SPieceTokenizerLoadType", "JiebaMode", "NormalizeForm", "Vectors", "ToVectors", "FastText",
|
||||
"GloVe", "CharNGram"
|
||||
]
|
||||
from . import transforms
|
||||
from . import utils
|
||||
from .transforms import JiebaTokenizer, Lookup, Ngram, PythonTokenizer, SentencePieceTokenizer, SlidingWindow, \
|
||||
ToNumber, ToVectors, TruncateSequencePair, UnicodeCharTokenizer, WordpieceTokenizer
|
||||
from .utils import CharNGram, FastText, GloVe, JiebaMode, NormalizeForm, SentencePieceModel, SentencePieceVocab, \
|
||||
SPieceTokenizerLoadType, SPieceTokenizerOutType, Vectors, Vocab, to_bytes, to_str
|
||||
|
||||
if platform.system().lower() != 'windows':
|
||||
from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \
|
||||
RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer
|
||||
|
||||
__all__.extend(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8",
|
||||
"RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"])
|
||||
from .transforms import BasicTokenizer, BertTokenizer, CaseFold, FilterWikipediaXML, NormalizeUTF8, RegexReplace, \
|
||||
RegexTokenizer, UnicodeScriptTokenizer, WhitespaceTokenizer
|
||||
|
|
|
@ -362,6 +362,43 @@ class Ngram(TextTensorOperation):
|
|||
return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator)
|
||||
|
||||
|
||||
class PythonTokenizer:
|
||||
"""
|
||||
Class that applies user-defined string tokenizer into input string.
|
||||
|
||||
Args:
|
||||
tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens.
|
||||
|
||||
Raises:
|
||||
TypeError: If `tokenizer` is not a callable Python function.
|
||||
|
||||
Supported Platforms:
|
||||
``CPU``
|
||||
|
||||
Examples:
|
||||
>>> def my_tokenizer(line):
|
||||
... return line.split()
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
|
||||
"""
|
||||
|
||||
@check_python_tokenizer
|
||||
def __init__(self, tokenizer):
|
||||
self.pyfunc = tokenizer
|
||||
self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)')
|
||||
self.random = False
|
||||
|
||||
def __call__(self, in_array):
|
||||
if not isinstance(in_array, np.ndarray):
|
||||
raise TypeError("input should be a NumPy array. Got {}.".format(type(in_array)))
|
||||
if in_array.dtype.type is np.bytes_:
|
||||
in_array = to_str(in_array)
|
||||
try:
|
||||
tokens = self.tokenizer(in_array)
|
||||
except Exception as e:
|
||||
raise RuntimeError("Error occurred in Pyfunc [" + str(self.pyfunc.__name__) + "], error message: " + str(e))
|
||||
return tokens
|
||||
|
||||
|
||||
class SentencePieceTokenizer(TextTensorOperation):
|
||||
"""
|
||||
Tokenize scalar token or 1-D tokens to tokens by sentencepiece.
|
||||
|
@ -653,43 +690,6 @@ class WordpieceTokenizer(TextTensorOperation):
|
|||
self.unknown_token, self.with_offsets)
|
||||
|
||||
|
||||
class PythonTokenizer:
|
||||
"""
|
||||
Class that applies user-defined string tokenizer into input string.
|
||||
|
||||
Args:
|
||||
tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens.
|
||||
|
||||
Raises:
|
||||
TypeError: If `tokenizer` is not a callable Python function.
|
||||
|
||||
Supported Platforms:
|
||||
``CPU``
|
||||
|
||||
Examples:
|
||||
>>> def my_tokenizer(line):
|
||||
... return line.split()
|
||||
>>> text_file_dataset = text_file_dataset.map(operations=text.PythonTokenizer(my_tokenizer))
|
||||
"""
|
||||
|
||||
@check_python_tokenizer
|
||||
def __init__(self, tokenizer):
|
||||
self.pyfunc = tokenizer
|
||||
self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)')
|
||||
self.random = False
|
||||
|
||||
def __call__(self, in_array):
|
||||
if not isinstance(in_array, np.ndarray):
|
||||
raise TypeError("input should be a NumPy array. Got {}.".format(type(in_array)))
|
||||
if in_array.dtype.type is np.bytes_:
|
||||
in_array = to_str(in_array)
|
||||
try:
|
||||
tokens = self.tokenizer(in_array)
|
||||
except Exception as e:
|
||||
raise RuntimeError("Error occurred in Pyfunc [" + str(self.pyfunc.__name__) + "], error message: " + str(e))
|
||||
return tokens
|
||||
|
||||
|
||||
if platform.system().lower() != 'windows':
|
||||
DE_C_INTER_NORMALIZE_FORM = {
|
||||
NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE,
|
||||
|
|
|
@ -26,9 +26,316 @@ from .validators import check_vocab, check_from_file, check_from_list, check_fro
|
|||
check_from_dataset_sentencepiece, check_from_file_sentencepiece, check_save_model, \
|
||||
check_from_file_vectors, check_tokens_to_ids, check_ids_to_tokens
|
||||
|
||||
__all__ = [
|
||||
"Vocab", "SentencePieceVocab", "to_str", "to_bytes", "Vectors", "FastText", "GloVe", "CharNGram"
|
||||
]
|
||||
|
||||
class CharNGram(cde.CharNGram):
|
||||
"""
|
||||
CharNGram object that is used to map tokens into pre-trained vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a CharNGram vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the CharNGram vectors.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class FastText(cde.FastText):
|
||||
"""
|
||||
FastText object that is used to map tokens into vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a FastText vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
|
||||
must be `*.vec`.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class GloVe(cde.GloVe):
|
||||
"""
|
||||
GloVe object that is used to map tokens into vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a GloVe vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the vectors. The format of pre-trained vector sets
|
||||
must be `glove.6B.*.txt`.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class JiebaMode(IntEnum):
|
||||
"""
|
||||
An enumeration for JiebaTokenizer.
|
||||
|
||||
Possible enumeration values are: JiebaMode.MIX, JiebaMode.MP, JiebaMode.HMM.
|
||||
|
||||
- JiebaMode.MIX: tokenize with a mix of MPSegment and HMMSegment algorithm.
|
||||
- JiebaMode.MP: tokenize with MPSegment algorithm.
|
||||
- JiebaMode.HMM: tokenize with Hidden Markov Model Segment algorithm.
|
||||
"""
|
||||
|
||||
MIX = 0
|
||||
MP = 1
|
||||
HMM = 2
|
||||
|
||||
|
||||
class NormalizeForm(IntEnum):
|
||||
"""
|
||||
Enumeration class for `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
|
||||
|
||||
Possible enumeration values are: NormalizeForm.NONE, NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD
|
||||
and NormalizeForm.NFKD.
|
||||
|
||||
- NormalizeForm.NONE: no normalization.
|
||||
- NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
|
||||
- NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
|
||||
- NormalizeForm.NFD: Canonical Decomposition.
|
||||
- NormalizeForm.NFKD: Compatibility Decomposition.
|
||||
"""
|
||||
|
||||
NONE = 0
|
||||
NFC = 1
|
||||
NFKC = 2
|
||||
NFD = 3
|
||||
NFKD = 4
|
||||
|
||||
|
||||
class SentencePieceModel(IntEnum):
|
||||
"""
|
||||
An enumeration for SentencePieceModel.
|
||||
|
||||
Possible enumeration values are: SentencePieceModel.UNIGRAM, SentencePieceModel.BPE, SentencePieceModel.CHAR,
|
||||
SentencePieceModel.WORD.
|
||||
|
||||
- SentencePieceModel.UNIGRAM: Unigram Language Model means the next word in the sentence is assumed to be
|
||||
independent of the previous words generated by the model.
|
||||
- SentencePieceModel.BPE: refers to byte pair encoding algorithm, which replaces the most frequent pair of bytes in
|
||||
a sentence with a single, unused byte.
|
||||
- SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
|
||||
- SentencePieceModel.WORD: refers to word based sentencePiece Model type.
|
||||
"""
|
||||
|
||||
UNIGRAM = 0
|
||||
BPE = 1
|
||||
CHAR = 2
|
||||
WORD = 3
|
||||
|
||||
|
||||
DE_C_INTER_SENTENCEPIECE_MODE = {
|
||||
SentencePieceModel.UNIGRAM: cde.SentencePieceModel.DE_SENTENCE_PIECE_UNIGRAM,
|
||||
SentencePieceModel.BPE: cde.SentencePieceModel.DE_SENTENCE_PIECE_BPE,
|
||||
SentencePieceModel.CHAR: cde.SentencePieceModel.DE_SENTENCE_PIECE_CHAR,
|
||||
SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
|
||||
}
|
||||
|
||||
|
||||
class SentencePieceVocab:
|
||||
"""
|
||||
SentencePiece object that is used to do words segmentation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.c_sentence_piece_vocab = None
|
||||
|
||||
@classmethod
|
||||
@check_from_dataset_sentencepiece
|
||||
def from_dataset(cls, dataset, col_names, vocab_size, character_coverage, model_type, params):
|
||||
"""
|
||||
Build a SentencePiece from a dataset.
|
||||
|
||||
Args:
|
||||
dataset (Dataset): Dataset to build SentencePiece.
|
||||
col_names (list): The list of the col name.
|
||||
vocab_size (int): Vocabulary size.
|
||||
character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
|
||||
languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
||||
character set.
|
||||
model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
|
||||
SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
|
||||
sentence must be pre-tokenized when using SentencePieceModel.WORD type.
|
||||
|
||||
- SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
|
||||
be independent of the previous words generated by the model.
|
||||
- SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
|
||||
of bytes in a sentence with a single, unused byte.
|
||||
- SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
|
||||
- SentencePieceModel.WORD, refers to word based sentencePiece Model type.
|
||||
|
||||
params (dict): A dictionary with no incoming parameters.
|
||||
|
||||
Returns:
|
||||
SentencePieceVocab, vocab built from the dataset.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.dataset.text import SentencePieceModel
|
||||
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
||||
>>> vocab = text.SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
|
||||
... SentencePieceModel.UNIGRAM, {})
|
||||
"""
|
||||
|
||||
sentence_piece_vocab = cls()
|
||||
sentence_piece_vocab.c_sentence_piece_vocab = dataset.build_sentencepiece_vocab(col_names, vocab_size,
|
||||
character_coverage,
|
||||
model_type, params)
|
||||
return sentence_piece_vocab
|
||||
|
||||
@classmethod
|
||||
@check_from_file_sentencepiece
|
||||
def from_file(cls, file_path, vocab_size, character_coverage, model_type, params):
|
||||
"""
|
||||
Build a SentencePiece object from a file.
|
||||
|
||||
Args:
|
||||
file_path (list): Path to the file which contains the SentencePiece list.
|
||||
vocab_size (int): Vocabulary size.
|
||||
character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
|
||||
languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
||||
character set.
|
||||
model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
|
||||
SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
|
||||
sentence must be pre-tokenized when using SentencePieceModel.WORD type.
|
||||
|
||||
- SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
|
||||
be independent of the previous words generated by the model.
|
||||
- SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
|
||||
of bytes in a sentence with a single, unused byte.
|
||||
- SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
|
||||
- SentencePieceModel.WORD, refers to word based sentencePiece Model type.
|
||||
|
||||
params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
|
||||
library).
|
||||
|
||||
Returns:
|
||||
SentencePieceVocab, vocab built from the file.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.dataset.text import SentencePieceModel
|
||||
>>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
|
||||
... SentencePieceModel.UNIGRAM, {})
|
||||
"""
|
||||
|
||||
sentence_piece_vocab = cls()
|
||||
sentence_piece_vocab.c_sentence_piece_vocab = cde.SentencePieceVocab.from_file(
|
||||
file_path, vocab_size, character_coverage, DE_C_INTER_SENTENCEPIECE_MODE.get(model_type), params)
|
||||
return sentence_piece_vocab
|
||||
|
||||
@classmethod
|
||||
@check_save_model
|
||||
def save_model(cls, vocab, path, filename):
|
||||
"""
|
||||
Save model into given filepath.
|
||||
|
||||
Args:
|
||||
vocab (SentencePieceVocab): A SentencePiece object.
|
||||
path (str): Path to store model.
|
||||
filename (str): The name of the file.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.dataset.text import SentencePieceModel
|
||||
>>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
|
||||
... SentencePieceModel.UNIGRAM, {})
|
||||
>>> text.SentencePieceVocab.save_model(vocab, "./", "m.model")
|
||||
"""
|
||||
|
||||
cde.SentencePieceVocab.save_model(vocab.c_sentence_piece_vocab, path, filename)
|
||||
|
||||
|
||||
class SPieceTokenizerLoadType(IntEnum):
|
||||
"""
|
||||
An enumeration for loading type of SentencePieceTokenizer.
|
||||
|
||||
Possible enumeration values are: SPieceTokenizerLoadType.FILE, SPieceTokenizerLoadType.MODEL.
|
||||
|
||||
- SPieceTokenizerLoadType.FILE: Load SentencePiece tokenizer from a Vocab file.
|
||||
- SPieceTokenizerLoadType.MODEL: Load SentencePiece tokenizer from a SentencePieceVocab object.
|
||||
"""
|
||||
|
||||
FILE = 0
|
||||
MODEL = 1
|
||||
|
||||
|
||||
class SPieceTokenizerOutType(IntEnum):
|
||||
"""
|
||||
An enumeration for SPieceTokenizerOutType.
|
||||
|
||||
Possible enumeration values are: SPieceTokenizerOutType.STRING, SPieceTokenizerOutType.INT.
|
||||
|
||||
- SPieceTokenizerOutType.STRING: means output type of SentencePiece Tokenizer is string.
|
||||
- SPieceTokenizerOutType.INT: means output type of SentencePiece Tokenizer is int.
|
||||
"""
|
||||
|
||||
STRING = 0
|
||||
INT = 1
|
||||
|
||||
|
||||
class Vectors(cde.Vectors):
|
||||
"""
|
||||
Vectors object that is used to map tokens into vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the vectors.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class Vocab:
|
||||
|
@ -41,66 +348,6 @@ class Vocab:
|
|||
def __init__(self):
|
||||
self.c_vocab = None
|
||||
|
||||
def vocab(self):
|
||||
"""
|
||||
Get the vocabory table in dict type.
|
||||
|
||||
Returns:
|
||||
A vocabulary consisting of word and id pairs.
|
||||
|
||||
Examples:
|
||||
>>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
|
||||
>>> vocabory_dict = vocab.vocab()
|
||||
"""
|
||||
check_vocab(self.c_vocab)
|
||||
return self.c_vocab.vocab()
|
||||
|
||||
@check_tokens_to_ids
|
||||
def tokens_to_ids(self, tokens):
|
||||
"""
|
||||
Converts a token string or a sequence of tokens in a single integer id or a sequence of ids.
|
||||
If token does not exist, return id with value -1.
|
||||
|
||||
Args:
|
||||
tokens (Union[str, list[str]]): One or several token(s) to convert to token id(s).
|
||||
|
||||
Returns:
|
||||
The token id or list of token ids.
|
||||
|
||||
Examples:
|
||||
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
||||
>>> ids = vocab.tokens_to_ids(["w1", "w3"])
|
||||
"""
|
||||
check_vocab(self.c_vocab)
|
||||
if isinstance(tokens, np.ndarray):
|
||||
tokens = tokens.tolist()
|
||||
if isinstance(tokens, str):
|
||||
tokens = [tokens]
|
||||
return self.c_vocab.tokens_to_ids(tokens)
|
||||
|
||||
@check_ids_to_tokens
|
||||
def ids_to_tokens(self, ids):
|
||||
"""
|
||||
Converts a single index or a sequence of indices in a token or a sequence of tokens.
|
||||
If id does not exist, return empty string.
|
||||
|
||||
Args:
|
||||
ids (Union[int, list[int]]): The token id (or token ids) to convert to tokens.
|
||||
|
||||
Returns:
|
||||
The decoded token(s).
|
||||
|
||||
Examples:
|
||||
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
||||
>>> token = vocab.ids_to_tokens(0)
|
||||
"""
|
||||
check_vocab(self.c_vocab)
|
||||
if isinstance(ids, np.ndarray):
|
||||
ids = ids.tolist()
|
||||
if isinstance(ids, int):
|
||||
ids = [ids]
|
||||
return self.c_vocab.ids_to_tokens(ids)
|
||||
|
||||
@classmethod
|
||||
@check_from_dataset
|
||||
def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, special_first=True):
|
||||
|
@ -236,141 +483,65 @@ class Vocab:
|
|||
vocab.c_vocab = cde.Vocab.from_dict(word_dict)
|
||||
return vocab
|
||||
|
||||
|
||||
class SentencePieceVocab:
|
||||
"""
|
||||
SentencePiece object that is used to do words segmentation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.c_sentence_piece_vocab = None
|
||||
|
||||
@classmethod
|
||||
@check_from_dataset_sentencepiece
|
||||
def from_dataset(cls, dataset, col_names, vocab_size, character_coverage, model_type, params):
|
||||
def vocab(self):
|
||||
"""
|
||||
Build a SentencePiece from a dataset.
|
||||
|
||||
Args:
|
||||
dataset (Dataset): Dataset to build SentencePiece.
|
||||
col_names (list): The list of the col name.
|
||||
vocab_size (int): Vocabulary size.
|
||||
character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
|
||||
languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
||||
character set.
|
||||
model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
|
||||
SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
|
||||
sentence must be pre-tokenized when using SentencePieceModel.WORD type.
|
||||
|
||||
- SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
|
||||
be independent of the previous words generated by the model.
|
||||
- SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
|
||||
of bytes in a sentence with a single, unused byte.
|
||||
- SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
|
||||
- SentencePieceModel.WORD, refers to word based sentencePiece Model type.
|
||||
|
||||
params (dict): A dictionary with no incoming parameters.
|
||||
Get the vocabory table in dict type.
|
||||
|
||||
Returns:
|
||||
SentencePieceVocab, vocab built from the dataset.
|
||||
A vocabulary consisting of word and id pairs.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.dataset.text import SentencePieceModel
|
||||
>>> dataset = ds.TextFileDataset("/path/to/sentence/piece/vocab/file", shuffle=False)
|
||||
>>> vocab = text.SentencePieceVocab.from_dataset(dataset, ["text"], 5000, 0.9995,
|
||||
... SentencePieceModel.UNIGRAM, {})
|
||||
>>> vocab = text.Vocab.from_list(["word_1", "word_2", "word_3", "word_4"])
|
||||
>>> vocabory_dict = vocab.vocab()
|
||||
"""
|
||||
check_vocab(self.c_vocab)
|
||||
return self.c_vocab.vocab()
|
||||
|
||||
sentence_piece_vocab = cls()
|
||||
sentence_piece_vocab.c_sentence_piece_vocab = dataset.build_sentencepiece_vocab(col_names, vocab_size,
|
||||
character_coverage,
|
||||
model_type, params)
|
||||
return sentence_piece_vocab
|
||||
|
||||
@classmethod
|
||||
@check_from_file_sentencepiece
|
||||
def from_file(cls, file_path, vocab_size, character_coverage, model_type, params):
|
||||
@check_tokens_to_ids
|
||||
def tokens_to_ids(self, tokens):
|
||||
"""
|
||||
Build a SentencePiece object from a file.
|
||||
Converts a token string or a sequence of tokens in a single integer id or a sequence of ids.
|
||||
If token does not exist, return id with value -1.
|
||||
|
||||
Args:
|
||||
file_path (list): Path to the file which contains the SentencePiece list.
|
||||
vocab_size (int): Vocabulary size.
|
||||
character_coverage (float): Amount of characters covered by the model, good defaults are: 0.9995 for
|
||||
languages with rich character set like Japanese or Chinese and 1.0 for other languages with small
|
||||
character set.
|
||||
model_type (SentencePieceModel): It can be any of [SentencePieceModel.UNIGRAM, SentencePieceModel.BPE,
|
||||
SentencePieceModel.CHAR, SentencePieceModel.WORD], default is SentencePieceModel.UNIGRAM. The input
|
||||
sentence must be pre-tokenized when using SentencePieceModel.WORD type.
|
||||
|
||||
- SentencePieceModel.UNIGRAM, Unigram Language Model means the next word in the sentence is assumed to
|
||||
be independent of the previous words generated by the model.
|
||||
- SentencePieceModel.BPE, refers to byte pair encoding algorithm, which replaces the most frequent pair
|
||||
of bytes in a sentence with a single, unused byte.
|
||||
- SentencePieceModel.CHAR, refers to char based sentencePiece Model type.
|
||||
- SentencePieceModel.WORD, refers to word based sentencePiece Model type.
|
||||
|
||||
params (dict): A dictionary with no incoming parameters(The parameters are derived from SentencePiece
|
||||
library).
|
||||
tokens (Union[str, list[str]]): One or several token(s) to convert to token id(s).
|
||||
|
||||
Returns:
|
||||
SentencePieceVocab, vocab built from the file.
|
||||
The token id or list of token ids.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.dataset.text import SentencePieceModel
|
||||
>>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
|
||||
... SentencePieceModel.UNIGRAM, {})
|
||||
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
||||
>>> ids = vocab.tokens_to_ids(["w1", "w3"])
|
||||
"""
|
||||
check_vocab(self.c_vocab)
|
||||
if isinstance(tokens, np.ndarray):
|
||||
tokens = tokens.tolist()
|
||||
if isinstance(tokens, str):
|
||||
tokens = [tokens]
|
||||
return self.c_vocab.tokens_to_ids(tokens)
|
||||
|
||||
sentence_piece_vocab = cls()
|
||||
sentence_piece_vocab.c_sentence_piece_vocab = \
|
||||
cde.SentencePieceVocab.from_file(file_path, vocab_size, character_coverage,
|
||||
DE_C_INTER_SENTENCEPIECE_MODE[model_type], params)
|
||||
return sentence_piece_vocab
|
||||
|
||||
@classmethod
|
||||
@check_save_model
|
||||
def save_model(cls, vocab, path, filename):
|
||||
@check_ids_to_tokens
|
||||
def ids_to_tokens(self, ids):
|
||||
"""
|
||||
Save model into given filepath.
|
||||
Converts a single index or a sequence of indices in a token or a sequence of tokens.
|
||||
If id does not exist, return empty string.
|
||||
|
||||
Args:
|
||||
vocab (SentencePieceVocab): A SentencePiece object.
|
||||
path (str): Path to store model.
|
||||
filename (str): The name of the file.
|
||||
ids (Union[int, list[int]]): The token id (or token ids) to convert to tokens.
|
||||
|
||||
Returns:
|
||||
The decoded token(s).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.dataset.text import SentencePieceModel
|
||||
>>> vocab = text.SentencePieceVocab.from_file(["/path/to/sentence/piece/vocab/file"], 5000, 0.9995,
|
||||
... SentencePieceModel.UNIGRAM, {})
|
||||
>>> text.SentencePieceVocab.save_model(vocab, "./", "m.model")
|
||||
>>> vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
||||
>>> token = vocab.ids_to_tokens(0)
|
||||
"""
|
||||
|
||||
cde.SentencePieceVocab.save_model(vocab.c_sentence_piece_vocab, path, filename)
|
||||
|
||||
|
||||
def to_str(array, encoding='utf8'):
|
||||
"""
|
||||
Convert NumPy array of `bytes` to array of `str` by decoding each element based on charset `encoding`.
|
||||
|
||||
Args:
|
||||
array (numpy.ndarray): Array of `bytes` type representing strings.
|
||||
encoding (str): Indicating the charset for decoding (default='utf8').
|
||||
|
||||
Returns:
|
||||
numpy.ndarray, NumPy array of `str`.
|
||||
|
||||
Examples:
|
||||
>>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"]
|
||||
>>> dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir, shuffle=False)
|
||||
>>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
... data = text.to_str(item["text"])
|
||||
"""
|
||||
|
||||
if not isinstance(array, np.ndarray):
|
||||
raise TypeError('input should be a NumPy array.')
|
||||
|
||||
return np.char.decode(array, encoding)
|
||||
check_vocab(self.c_vocab)
|
||||
if isinstance(ids, np.ndarray):
|
||||
ids = ids.tolist()
|
||||
if isinstance(ids, int):
|
||||
ids = [ids]
|
||||
return self.c_vocab.ids_to_tokens(ids)
|
||||
|
||||
|
||||
def to_bytes(array, encoding='utf8'):
|
||||
|
@ -398,201 +569,25 @@ def to_bytes(array, encoding='utf8'):
|
|||
return np.char.encode(array, encoding)
|
||||
|
||||
|
||||
class JiebaMode(IntEnum):
|
||||
def to_str(array, encoding='utf8'):
|
||||
"""
|
||||
An enumeration for JiebaTokenizer.
|
||||
Convert NumPy array of `bytes` to array of `str` by decoding each element based on charset `encoding`.
|
||||
|
||||
Possible enumeration values are: JiebaMode.MIX, JiebaMode.MP, JiebaMode.HMM.
|
||||
Args:
|
||||
array (numpy.ndarray): Array of `bytes` type representing strings.
|
||||
encoding (str): Indicating the charset for decoding (default='utf8').
|
||||
|
||||
- JiebaMode.MIX: tokenize with a mix of MPSegment and HMMSegment algorithm.
|
||||
- JiebaMode.MP: tokenize with MPSegment algorithm.
|
||||
- JiebaMode.HMM: tokenize with Hidden Markov Model Segment algorithm.
|
||||
Returns:
|
||||
numpy.ndarray, NumPy array of `str`.
|
||||
|
||||
Examples:
|
||||
>>> text_file_dataset_dir = ["/path/to/text_file_dataset_file"]
|
||||
>>> dataset = ds.TextFileDataset(dataset_files=text_file_dataset_dir, shuffle=False)
|
||||
>>> for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
... data = text.to_str(item["text"])
|
||||
"""
|
||||
|
||||
MIX = 0
|
||||
MP = 1
|
||||
HMM = 2
|
||||
if not isinstance(array, np.ndarray):
|
||||
raise TypeError('input should be a NumPy array.')
|
||||
|
||||
|
||||
class NormalizeForm(IntEnum):
|
||||
"""
|
||||
Enumeration class for `Unicode normalization forms <http://unicode.org/reports/tr15/>`_ .
|
||||
|
||||
Possible enumeration values are: NormalizeForm.NONE, NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD
|
||||
and NormalizeForm.NFKD.
|
||||
|
||||
- NormalizeForm.NONE: no normalization.
|
||||
- NormalizeForm.NFC: Canonical Decomposition, followed by Canonical Composition.
|
||||
- NormalizeForm.NFKC: Compatibility Decomposition, followed by Canonical Composition.
|
||||
- NormalizeForm.NFD: Canonical Decomposition.
|
||||
- NormalizeForm.NFKD: Compatibility Decomposition.
|
||||
"""
|
||||
|
||||
NONE = 0
|
||||
NFC = 1
|
||||
NFKC = 2
|
||||
NFD = 3
|
||||
NFKD = 4
|
||||
|
||||
|
||||
class SentencePieceModel(IntEnum):
|
||||
"""
|
||||
An enumeration for SentencePieceModel.
|
||||
|
||||
Possible enumeration values are: SentencePieceModel.UNIGRAM, SentencePieceModel.BPE, SentencePieceModel.CHAR,
|
||||
SentencePieceModel.WORD.
|
||||
|
||||
- SentencePieceModel.UNIGRAM: Unigram Language Model means the next word in the sentence is assumed to be
|
||||
independent of the previous words generated by the model.
|
||||
- SentencePieceModel.BPE: refers to byte pair encoding algorithm, which replaces the most frequent pair of bytes in
|
||||
a sentence with a single, unused byte.
|
||||
- SentencePieceModel.CHAR: refers to char based sentencePiece Model type.
|
||||
- SentencePieceModel.WORD: refers to word based sentencePiece Model type.
|
||||
"""
|
||||
|
||||
UNIGRAM = 0
|
||||
BPE = 1
|
||||
CHAR = 2
|
||||
WORD = 3
|
||||
|
||||
|
||||
DE_C_INTER_SENTENCEPIECE_MODE = {
|
||||
SentencePieceModel.UNIGRAM: cde.SentencePieceModel.DE_SENTENCE_PIECE_UNIGRAM,
|
||||
SentencePieceModel.BPE: cde.SentencePieceModel.DE_SENTENCE_PIECE_BPE,
|
||||
SentencePieceModel.CHAR: cde.SentencePieceModel.DE_SENTENCE_PIECE_CHAR,
|
||||
SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
|
||||
}
|
||||
|
||||
|
||||
class SPieceTokenizerOutType(IntEnum):
|
||||
"""
|
||||
An enumeration for SPieceTokenizerOutType.
|
||||
|
||||
Possible enumeration values are: SPieceTokenizerOutType.STRING, SPieceTokenizerOutType.INT.
|
||||
|
||||
- SPieceTokenizerOutType.STRING: means output type of SentencePiece Tokenizer is string.
|
||||
- SPieceTokenizerOutType.INT: means output type of SentencePiece Tokenizer is int.
|
||||
"""
|
||||
|
||||
STRING = 0
|
||||
INT = 1
|
||||
|
||||
|
||||
class SPieceTokenizerLoadType(IntEnum):
|
||||
"""
|
||||
An enumeration for loading type of SentencePieceTokenizer.
|
||||
|
||||
Possible enumeration values are: SPieceTokenizerLoadType.FILE, SPieceTokenizerLoadType.MODEL.
|
||||
|
||||
- SPieceTokenizerLoadType.FILE: Load SentencePiece tokenizer from a Vocab file.
|
||||
- SPieceTokenizerLoadType.MODEL: Load SentencePiece tokenizer from a SentencePieceVocab object.
|
||||
"""
|
||||
|
||||
FILE = 0
|
||||
MODEL = 1
|
||||
|
||||
|
||||
class Vectors(cde.Vectors):
|
||||
"""
|
||||
Vectors object that is used to map tokens into vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the vectors.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> vector = text.Vectors.from_file("/path/to/vectors/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class FastText(cde.FastText):
|
||||
"""
|
||||
FastText object that is used to map tokens into vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a FastText vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the vectors. The shuffix of pre-trained vector sets
|
||||
must be `*.vec`.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> fast_text = text.FastText.from_file("/path/to/fast_text/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class GloVe(cde.GloVe):
|
||||
"""
|
||||
GloVe object that is used to map tokens into vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a GloVe vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the vectors. The format of pre-trained vector sets
|
||||
must be `glove.6B.*.txt`.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> glove = text.GloVe.from_file("/path/to/glove/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
|
||||
|
||||
class CharNGram(cde.CharNGram):
|
||||
"""
|
||||
CharNGram object that is used to map tokens into pre-trained vectors.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@check_from_file_vectors
|
||||
def from_file(cls, file_path, max_vectors=None):
|
||||
"""
|
||||
Build a CharNGram vector from a file.
|
||||
|
||||
Args:
|
||||
file_path (str): Path of the file that contains the CharNGram vectors.
|
||||
max_vectors (int, optional): This can be used to limit the number of pre-trained vectors loaded.
|
||||
Most pre-trained vector sets are sorted in the descending order of word frequency. Thus, in
|
||||
situations where the entire set doesn’t fit in memory, or is not needed for another reason,
|
||||
passing max_vectors can limit the size of the loaded set (default=None, no limit).
|
||||
|
||||
Examples:
|
||||
>>> char_n_gram = text.CharNGram.from_file("/path/to/char_n_gram/file", max_vectors=None)
|
||||
"""
|
||||
|
||||
max_vectors = max_vectors if max_vectors is not None else 0
|
||||
return super().from_file(file_path, max_vectors)
|
||||
return np.char.decode(array, encoding)
|
||||
|
|
|
@ -39,5 +39,5 @@ from .. import vision
|
|||
from . import c_transforms
|
||||
from . import py_transforms
|
||||
from . import transforms
|
||||
from .transforms import not_random, Relational, Compose, Concatenate, Duplicate, Fill, Mask, OneHot, PadEnd, Plugin, \
|
||||
RandomApply, RandomChoice, RandomOrder, Slice, TypeCast, Unique
|
||||
from .transforms import Compose, Concatenate, Duplicate, Fill, Mask, OneHot, PadEnd, Plugin, RandomApply, \
|
||||
RandomChoice, RandomOrder, Relational, Slice, TypeCast, Unique, not_random
|
||||
|
|
|
@ -459,36 +459,6 @@ class Fill(TensorOperation):
|
|||
return cde.FillOperation(self.fill_value)
|
||||
|
||||
|
||||
class Relational(IntEnum):
|
||||
"""
|
||||
Relationship operator.
|
||||
|
||||
Possible enumeration values are: Relational.EQ, Relational.NE, Relational.GT, Relational.GE, Relational.LT,
|
||||
Relational.LE.
|
||||
|
||||
- Relational.EQ: refers to Equality.
|
||||
- Relational.NE: refers not equal, or Inequality.
|
||||
- Relational.GT: refers to Greater than.
|
||||
- Relational.GE: refers to Greater than or equal to.
|
||||
- Relational.LT: refers to Less than.
|
||||
- Relational.LE: refers to Less than or equal to.
|
||||
"""
|
||||
EQ = 0
|
||||
NE = 1
|
||||
GT = 2
|
||||
GE = 3
|
||||
LT = 4
|
||||
LE = 5
|
||||
|
||||
|
||||
DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
|
||||
Relational.NE: cde.RelationalOp.NE,
|
||||
Relational.GT: cde.RelationalOp.GT,
|
||||
Relational.GE: cde.RelationalOp.GE,
|
||||
Relational.LT: cde.RelationalOp.LT,
|
||||
Relational.LE: cde.RelationalOp.LE}
|
||||
|
||||
|
||||
class Mask(TensorOperation):
|
||||
r"""
|
||||
Mask content of the input tensor with the given predicate.
|
||||
|
@ -808,6 +778,36 @@ class RandomOrder(PyTensorOperation):
|
|||
return util.random_order(img, self.transforms)
|
||||
|
||||
|
||||
class Relational(IntEnum):
|
||||
"""
|
||||
Relationship operator.
|
||||
|
||||
Possible enumeration values are: Relational.EQ, Relational.NE, Relational.GT, Relational.GE, Relational.LT,
|
||||
Relational.LE.
|
||||
|
||||
- Relational.EQ: refers to Equality.
|
||||
- Relational.NE: refers not equal, or Inequality.
|
||||
- Relational.GT: refers to Greater than.
|
||||
- Relational.GE: refers to Greater than or equal to.
|
||||
- Relational.LT: refers to Less than.
|
||||
- Relational.LE: refers to Less than or equal to.
|
||||
"""
|
||||
EQ = 0
|
||||
NE = 1
|
||||
GT = 2
|
||||
GE = 3
|
||||
LT = 4
|
||||
LE = 5
|
||||
|
||||
|
||||
DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
|
||||
Relational.NE: cde.RelationalOp.NE,
|
||||
Relational.GT: cde.RelationalOp.GT,
|
||||
Relational.GE: cde.RelationalOp.GE,
|
||||
Relational.LT: cde.RelationalOp.LT,
|
||||
Relational.LE: cde.RelationalOp.LE}
|
||||
|
||||
|
||||
class _SliceOption(cde.SliceOption):
|
||||
"""
|
||||
Internal class SliceOption to be used with SliceOperation
|
||||
|
|
|
@ -39,14 +39,15 @@ Descriptions of common data processing terms are as follows:
|
|||
from . import c_transforms
|
||||
from . import py_transforms
|
||||
from . import transforms
|
||||
from .transforms import not_random, AdjustGamma, AutoAugment, AutoContrast, BoundingBoxAugment, CenterCrop, \
|
||||
ConvertColor, Crop, CutMixBatch, CutOut, Decode, Equalize, FiveCrop, GaussianBlur, Grayscale, HorizontalFlip, \
|
||||
HsvToRgb, HWC2CHW, Invert, LinearTransformation, MixUpBatch, MixUp, NormalizePad, Normalize, Pad, PadToSize, \
|
||||
RandomAdjustSharpness, RandomAffine, RandomAutoContrast, RandomColorAdjust, RandomColor, RandomCropDecodeResize, \
|
||||
RandomCrop, RandomCropWithBBox, RandomEqualize, RandomErasing, RandomGrayscale, RandomHorizontalFlip, \
|
||||
from . import utils
|
||||
from .transforms import AdjustGamma, AutoAugment, AutoContrast, BoundingBoxAugment, CenterCrop, ConvertColor, Crop, \
|
||||
CutMixBatch, CutOut, Decode, Equalize, FiveCrop, GaussianBlur, Grayscale, HorizontalFlip, HsvToRgb, HWC2CHW, \
|
||||
Invert, LinearTransformation, MixUp, MixUpBatch, Normalize, NormalizePad, Pad, PadToSize, RandomAdjustSharpness, \
|
||||
RandomAffine, RandomAutoContrast, RandomColor, RandomColorAdjust, RandomCrop, RandomCropDecodeResize, \
|
||||
RandomCropWithBBox, RandomEqualize, RandomErasing, RandomGrayscale, RandomHorizontalFlip, \
|
||||
RandomHorizontalFlipWithBBox, RandomInvert, RandomLighting, RandomPerspective, RandomPosterize, RandomResizedCrop, \
|
||||
RandomResizedCropWithBBox, RandomResize, RandomResizeWithBBox, RandomRotation, RandomSelectSubpolicy, \
|
||||
RandomSharpness, RandomSolarize, RandomVerticalFlip, RandomVerticalFlipWithBBox, Rescale, Resize, ResizeWithBBox, \
|
||||
RgbToHsv, Rotate, SlicePatches, TenCrop, ToNumpy, ToPIL, ToTensor, ToType, UniformAugment, VerticalFlip
|
||||
from .utils import Inter, Border, ConvertMode, ImageBatchFormat, SliceMode, AutoAugmentPolicy, get_image_num_channels, \
|
||||
RgbToHsv, Rotate, SlicePatches, TenCrop, ToNumpy, ToPIL, ToTensor, ToType, UniformAugment, VerticalFlip, not_random
|
||||
from .utils import AutoAugmentPolicy, Border, ConvertMode, ImageBatchFormat, Inter, SliceMode, get_image_num_channels, \
|
||||
get_image_size
|
||||
|
|
|
@ -24,227 +24,6 @@ from mindspore import log as logger
|
|||
import mindspore._c_dataengine as cde
|
||||
|
||||
|
||||
class Inter(IntEnum):
|
||||
"""
|
||||
Interpolation Modes.
|
||||
|
||||
Possible enumeration values are: Inter.NEAREST, Inter.ANTIALIAS, Inter.LINEAR, Inter.BILINEAR, Inter.CUBIC,
|
||||
Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC.
|
||||
|
||||
- Inter.NEAREST: means interpolation method is nearest-neighbor interpolation.
|
||||
- Inter.ANTIALIAS: means the interpolation method is antialias interpolation.
|
||||
- Inter.LINEAR: means interpolation method is bilinear interpolation, here is the same as Inter.BILINEAR.
|
||||
- Inter.BILINEAR: means interpolation method is bilinear interpolation.
|
||||
- Inter.CUBIC: means the interpolation method is bicubic interpolation, here is the same as Inter.BICUBIC.
|
||||
- Inter.BICUBIC: means the interpolation method is bicubic interpolation.
|
||||
- Inter.AREA: means interpolation method is pixel area interpolation.
|
||||
- Inter.PILCUBIC: means interpolation method is bicubic interpolation like implemented in pillow, input
|
||||
should be in 3 channels format.
|
||||
"""
|
||||
NEAREST = 0
|
||||
ANTIALIAS = 1
|
||||
BILINEAR = LINEAR = 2
|
||||
BICUBIC = CUBIC = 3
|
||||
AREA = 4
|
||||
PILCUBIC = 5
|
||||
|
||||
@staticmethod
|
||||
def to_python_type(inter_type):
|
||||
"""
|
||||
Function to return Python type for Interpolation Mode.
|
||||
"""
|
||||
if Image.__version__ >= "9.1.0":
|
||||
python_values = {Inter.NEAREST: Image.Resampling.NEAREST,
|
||||
Inter.ANTIALIAS: Image.Resampling.LANCZOS,
|
||||
Inter.LINEAR: Image.Resampling.BILINEAR,
|
||||
Inter.CUBIC: Image.Resampling.BICUBIC}
|
||||
else:
|
||||
python_values = {Inter.NEAREST: Image.NEAREST,
|
||||
Inter.ANTIALIAS: Image.ANTIALIAS,
|
||||
Inter.LINEAR: Image.LINEAR,
|
||||
Inter.CUBIC: Image.CUBIC}
|
||||
return python_values.get(inter_type)
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(inter_type):
|
||||
"""
|
||||
Function to return C type for Interpolation Mode.
|
||||
"""
|
||||
c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
|
||||
Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
|
||||
Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
|
||||
Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
|
||||
Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}
|
||||
|
||||
return c_values.get(inter_type)
|
||||
|
||||
|
||||
class Border(str, Enum):
|
||||
"""
|
||||
Padding Mode, Border Type.
|
||||
|
||||
Possible enumeration values are: Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC.
|
||||
|
||||
- Border.CONSTANT: means it fills the border with constant values.
|
||||
- Border.EDGE: means it pads with the last value on the edge.
|
||||
- Border.REFLECT: means it reflects the values on the edge omitting the last value of edge.
|
||||
- Border.SYMMETRIC: means it reflects the values on the edge repeating the last value of edge.
|
||||
|
||||
Note: This class derived from class str to support json serializable.
|
||||
"""
|
||||
CONSTANT: str = "constant"
|
||||
EDGE: str = "edge"
|
||||
REFLECT: str = "reflect"
|
||||
SYMMETRIC: str = "symmetric"
|
||||
|
||||
@staticmethod
|
||||
def to_python_type(border_type):
|
||||
"""
|
||||
Function to return Python type for Border Type.
|
||||
"""
|
||||
python_values = {Border.CONSTANT: 'constant',
|
||||
Border.EDGE: 'edge',
|
||||
Border.REFLECT: 'reflect',
|
||||
Border.SYMMETRIC: 'symmetric'}
|
||||
return python_values.get(border_type)
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(border_type):
|
||||
"""
|
||||
Function to return C type for Border Type.
|
||||
"""
|
||||
c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
|
||||
Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
|
||||
Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
|
||||
Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
|
||||
|
||||
return c_values.get(border_type)
|
||||
|
||||
|
||||
class ImageBatchFormat(IntEnum):
|
||||
"""
|
||||
Data Format of images after batch operation.
|
||||
|
||||
Possible enumeration values are: ImageBatchFormat.NHWC, ImageBatchFormat.NCHW.
|
||||
|
||||
- ImageBatchFormat.NHWC: in orders like, batch N, height H, width W, channels C to store the data.
|
||||
- ImageBatchFormat.NCHW: in orders like, batch N, channels C, height H, width W to store the data.
|
||||
"""
|
||||
NHWC = 0
|
||||
NCHW = 1
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(image_batch_format):
|
||||
"""
|
||||
Function to return C type for ImageBatchFormat.
|
||||
"""
|
||||
c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
|
||||
ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
|
||||
|
||||
return c_values.get(image_batch_format)
|
||||
|
||||
|
||||
class ConvertMode(IntEnum):
|
||||
"""
|
||||
The color conversion mode.
|
||||
|
||||
Possible enumeration values are as follows:
|
||||
|
||||
- ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images.
|
||||
- ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images.
|
||||
- ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images.
|
||||
- ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images.
|
||||
- ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images.
|
||||
- ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images.
|
||||
- ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images.
|
||||
- ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images.
|
||||
- ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images.
|
||||
- ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images.
|
||||
- ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images.
|
||||
- ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images.
|
||||
- ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images.
|
||||
- ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images.
|
||||
- ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images.
|
||||
- ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images.
|
||||
- ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images.
|
||||
- ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images.
|
||||
- ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images.
|
||||
- ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images.
|
||||
"""
|
||||
COLOR_BGR2BGRA = 0
|
||||
COLOR_RGB2RGBA = COLOR_BGR2BGRA
|
||||
COLOR_BGRA2BGR = 1
|
||||
COLOR_RGBA2RGB = COLOR_BGRA2BGR
|
||||
COLOR_BGR2RGBA = 2
|
||||
COLOR_RGB2BGRA = COLOR_BGR2RGBA
|
||||
COLOR_RGBA2BGR = 3
|
||||
COLOR_BGRA2RGB = COLOR_RGBA2BGR
|
||||
COLOR_BGR2RGB = 4
|
||||
COLOR_RGB2BGR = COLOR_BGR2RGB
|
||||
COLOR_BGRA2RGBA = 5
|
||||
COLOR_RGBA2BGRA = COLOR_BGRA2RGBA
|
||||
COLOR_BGR2GRAY = 6
|
||||
COLOR_RGB2GRAY = 7
|
||||
COLOR_GRAY2BGR = 8
|
||||
COLOR_GRAY2RGB = COLOR_GRAY2BGR
|
||||
COLOR_GRAY2BGRA = 9
|
||||
COLOR_GRAY2RGBA = COLOR_GRAY2BGRA
|
||||
COLOR_BGRA2GRAY = 10
|
||||
COLOR_RGBA2GRAY = 11
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(mode):
|
||||
"""
|
||||
Function to return C type for color mode.
|
||||
"""
|
||||
c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
|
||||
ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
|
||||
ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
|
||||
ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
|
||||
ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
|
||||
ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
|
||||
ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
|
||||
ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
|
||||
ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
|
||||
ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
|
||||
ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
|
||||
ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
|
||||
ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
|
||||
ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
|
||||
ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
|
||||
ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
|
||||
ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
|
||||
ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
|
||||
ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
|
||||
ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
|
||||
}
|
||||
|
||||
return c_values.get(mode)
|
||||
|
||||
|
||||
class SliceMode(IntEnum):
|
||||
"""
|
||||
Mode to Slice Tensor into multiple parts.
|
||||
|
||||
Possible enumeration values are: SliceMode.PAD, SliceMode.DROP.
|
||||
|
||||
- SliceMode.PAD: pad some pixels before slice the Tensor if needed.
|
||||
- SliceMode.DROP: drop remainder pixels before slice the Tensor if needed.
|
||||
"""
|
||||
PAD = 0
|
||||
DROP = 1
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(mode):
|
||||
"""
|
||||
Function to return C type for SliceMode.
|
||||
"""
|
||||
c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
|
||||
SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}
|
||||
|
||||
return c_values.get(mode)
|
||||
|
||||
|
||||
class AutoAugmentPolicy(str, Enum):
|
||||
"""
|
||||
AutoAugment policy for different datasets.
|
||||
|
@ -331,22 +110,225 @@ class AutoAugmentPolicy(str, Enum):
|
|||
return c_values.get(policy)
|
||||
|
||||
|
||||
def parse_padding(padding):
|
||||
""" Parses and prepares the padding tuple"""
|
||||
class Border(str, Enum):
|
||||
"""
|
||||
Padding Mode, Border Type.
|
||||
|
||||
if isinstance(padding, numbers.Number):
|
||||
padding = [padding] * 4
|
||||
if len(padding) == 2:
|
||||
logger.warning("The behavior when `padding` is a sequence of length 2 will change from padding left/top "
|
||||
"with the first value and right/bottom with the second, to padding left/right with the "
|
||||
"first one and top/bottom with the second in the future. Or you can pass in a 4-element "
|
||||
"sequence to specify left, top, right and bottom respectively.")
|
||||
left = top = padding[0]
|
||||
right = bottom = padding[1]
|
||||
padding = (left, top, right, bottom,)
|
||||
if isinstance(padding, list):
|
||||
padding = tuple(padding)
|
||||
return padding
|
||||
Possible enumeration values are: Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC.
|
||||
|
||||
- Border.CONSTANT: means it fills the border with constant values.
|
||||
- Border.EDGE: means it pads with the last value on the edge.
|
||||
- Border.REFLECT: means it reflects the values on the edge omitting the last value of edge.
|
||||
- Border.SYMMETRIC: means it reflects the values on the edge repeating the last value of edge.
|
||||
|
||||
Note: This class derived from class str to support json serializable.
|
||||
"""
|
||||
CONSTANT: str = "constant"
|
||||
EDGE: str = "edge"
|
||||
REFLECT: str = "reflect"
|
||||
SYMMETRIC: str = "symmetric"
|
||||
|
||||
@staticmethod
|
||||
def to_python_type(border_type):
|
||||
"""
|
||||
Function to return Python type for Border Type.
|
||||
"""
|
||||
python_values = {Border.CONSTANT: 'constant',
|
||||
Border.EDGE: 'edge',
|
||||
Border.REFLECT: 'reflect',
|
||||
Border.SYMMETRIC: 'symmetric'}
|
||||
return python_values.get(border_type)
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(border_type):
|
||||
"""
|
||||
Function to return C type for Border Type.
|
||||
"""
|
||||
c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
|
||||
Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
|
||||
Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
|
||||
Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}
|
||||
|
||||
return c_values.get(border_type)
|
||||
|
||||
|
||||
class ConvertMode(IntEnum):
|
||||
"""
|
||||
The color conversion mode.
|
||||
|
||||
Possible enumeration values are as follows:
|
||||
|
||||
- ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images.
|
||||
- ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images.
|
||||
- ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images.
|
||||
- ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images.
|
||||
- ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images.
|
||||
- ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images.
|
||||
- ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images.
|
||||
- ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images.
|
||||
- ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images.
|
||||
- ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images.
|
||||
- ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images.
|
||||
- ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images.
|
||||
- ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images.
|
||||
- ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images.
|
||||
- ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images.
|
||||
- ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images.
|
||||
- ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images.
|
||||
- ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images.
|
||||
- ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images.
|
||||
- ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images.
|
||||
"""
|
||||
COLOR_BGR2BGRA = 0
|
||||
COLOR_RGB2RGBA = COLOR_BGR2BGRA
|
||||
COLOR_BGRA2BGR = 1
|
||||
COLOR_RGBA2RGB = COLOR_BGRA2BGR
|
||||
COLOR_BGR2RGBA = 2
|
||||
COLOR_RGB2BGRA = COLOR_BGR2RGBA
|
||||
COLOR_RGBA2BGR = 3
|
||||
COLOR_BGRA2RGB = COLOR_RGBA2BGR
|
||||
COLOR_BGR2RGB = 4
|
||||
COLOR_RGB2BGR = COLOR_BGR2RGB
|
||||
COLOR_BGRA2RGBA = 5
|
||||
COLOR_RGBA2BGRA = COLOR_BGRA2RGBA
|
||||
COLOR_BGR2GRAY = 6
|
||||
COLOR_RGB2GRAY = 7
|
||||
COLOR_GRAY2BGR = 8
|
||||
COLOR_GRAY2RGB = COLOR_GRAY2BGR
|
||||
COLOR_GRAY2BGRA = 9
|
||||
COLOR_GRAY2RGBA = COLOR_GRAY2BGRA
|
||||
COLOR_BGRA2GRAY = 10
|
||||
COLOR_RGBA2GRAY = 11
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(mode):
|
||||
"""
|
||||
Function to return C type for color mode.
|
||||
"""
|
||||
c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
|
||||
ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
|
||||
ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
|
||||
ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
|
||||
ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
|
||||
ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
|
||||
ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
|
||||
ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
|
||||
ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
|
||||
ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
|
||||
ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
|
||||
ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
|
||||
ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
|
||||
ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
|
||||
ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
|
||||
ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
|
||||
ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
|
||||
ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
|
||||
ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
|
||||
ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
|
||||
}
|
||||
|
||||
return c_values.get(mode)
|
||||
|
||||
|
||||
class ImageBatchFormat(IntEnum):
|
||||
"""
|
||||
Data Format of images after batch operation.
|
||||
|
||||
Possible enumeration values are: ImageBatchFormat.NHWC, ImageBatchFormat.NCHW.
|
||||
|
||||
- ImageBatchFormat.NHWC: in orders like, batch N, height H, width W, channels C to store the data.
|
||||
- ImageBatchFormat.NCHW: in orders like, batch N, channels C, height H, width W to store the data.
|
||||
"""
|
||||
NHWC = 0
|
||||
NCHW = 1
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(image_batch_format):
|
||||
"""
|
||||
Function to return C type for ImageBatchFormat.
|
||||
"""
|
||||
c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
|
||||
ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}
|
||||
|
||||
return c_values.get(image_batch_format)
|
||||
|
||||
|
||||
class Inter(IntEnum):
|
||||
"""
|
||||
Interpolation Modes.
|
||||
|
||||
Possible enumeration values are: Inter.NEAREST, Inter.ANTIALIAS, Inter.LINEAR, Inter.BILINEAR, Inter.CUBIC,
|
||||
Inter.BICUBIC, Inter.AREA, Inter.PILCUBIC.
|
||||
|
||||
- Inter.NEAREST: means interpolation method is nearest-neighbor interpolation.
|
||||
- Inter.ANTIALIAS: means the interpolation method is antialias interpolation.
|
||||
- Inter.LINEAR: means interpolation method is bilinear interpolation, here is the same as Inter.BILINEAR.
|
||||
- Inter.BILINEAR: means interpolation method is bilinear interpolation.
|
||||
- Inter.CUBIC: means the interpolation method is bicubic interpolation, here is the same as Inter.BICUBIC.
|
||||
- Inter.BICUBIC: means the interpolation method is bicubic interpolation.
|
||||
- Inter.AREA: means interpolation method is pixel area interpolation.
|
||||
- Inter.PILCUBIC: means interpolation method is bicubic interpolation like implemented in pillow, input
|
||||
should be in 3 channels format.
|
||||
"""
|
||||
NEAREST = 0
|
||||
ANTIALIAS = 1
|
||||
BILINEAR = LINEAR = 2
|
||||
BICUBIC = CUBIC = 3
|
||||
AREA = 4
|
||||
PILCUBIC = 5
|
||||
|
||||
@staticmethod
|
||||
def to_python_type(inter_type):
|
||||
"""
|
||||
Function to return Python type for Interpolation Mode.
|
||||
"""
|
||||
if Image.__version__ >= "9.1.0":
|
||||
python_values = {Inter.NEAREST: Image.Resampling.NEAREST,
|
||||
Inter.ANTIALIAS: Image.Resampling.LANCZOS,
|
||||
Inter.LINEAR: Image.Resampling.BILINEAR,
|
||||
Inter.CUBIC: Image.Resampling.BICUBIC}
|
||||
else:
|
||||
python_values = {Inter.NEAREST: Image.NEAREST,
|
||||
Inter.ANTIALIAS: Image.ANTIALIAS,
|
||||
Inter.LINEAR: Image.LINEAR,
|
||||
Inter.CUBIC: Image.CUBIC}
|
||||
return python_values.get(inter_type)
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(inter_type):
|
||||
"""
|
||||
Function to return C type for Interpolation Mode.
|
||||
"""
|
||||
c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
|
||||
Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
|
||||
Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
|
||||
Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
|
||||
Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}
|
||||
|
||||
return c_values.get(inter_type)
|
||||
|
||||
|
||||
class SliceMode(IntEnum):
|
||||
"""
|
||||
Mode to Slice Tensor into multiple parts.
|
||||
|
||||
Possible enumeration values are: SliceMode.PAD, SliceMode.DROP.
|
||||
|
||||
- SliceMode.PAD: pad some pixels before slice the Tensor if needed.
|
||||
- SliceMode.DROP: drop remainder pixels before slice the Tensor if needed.
|
||||
"""
|
||||
PAD = 0
|
||||
DROP = 1
|
||||
|
||||
@staticmethod
|
||||
def to_c_type(mode):
|
||||
"""
|
||||
Function to return C type for SliceMode.
|
||||
"""
|
||||
c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
|
||||
SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}
|
||||
|
||||
return c_values.get(mode)
|
||||
|
||||
|
||||
def get_image_num_channels(image):
|
||||
|
@ -397,3 +379,21 @@ def get_image_size(image):
|
|||
return size_list
|
||||
|
||||
raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image)))
|
||||
|
||||
|
||||
def parse_padding(padding):
|
||||
""" Parses and prepares the padding tuple"""
|
||||
|
||||
if isinstance(padding, numbers.Number):
|
||||
padding = [padding] * 4
|
||||
if len(padding) == 2:
|
||||
logger.warning("The behavior when `padding` is a sequence of length 2 will change from padding left/top "
|
||||
"with the first value and right/bottom with the second, to padding left/right with the "
|
||||
"first one and top/bottom with the second in the future. Or you can pass in a 4-element "
|
||||
"sequence to specify left, top, right and bottom respectively.")
|
||||
left = top = padding[0]
|
||||
right = bottom = padding[1]
|
||||
padding = (left, top, right, bottom,)
|
||||
if isinstance(padding, list):
|
||||
padding = tuple(padding)
|
||||
return padding
|
||||
|
|
Loading…
Reference in New Issue