From 6fd58dc580000118bd244974205c91be08c087fc Mon Sep 17 00:00:00 2001 From: alex-yuyue Date: Fri, 29 Jan 2021 09:21:06 -0500 Subject: [PATCH] Add call for decoupled image and text ops Signed-off-by: alex-yuyue --- .../ccsrc/minddata/dataset/api/execute.cc | 25 +- .../dataset/include/execute_binding.cc | 22 +- .../ccsrc/minddata/dataset/include/execute.h | 13 +- mindspore/dataset/text/transforms.py | 826 +++++----- mindspore/dataset/transforms/c_transforms.py | 12 +- mindspore/dataset/transforms/validators.py | 20 +- mindspore/dataset/vision/c_transforms.py | 1349 ++++++++--------- tests/ut/python/dataset/test_HWC2CHW.py | 17 +- tests/ut/python/dataset/test_compose.py | 10 +- tests/ut/python/dataset/test_invert.py | 18 +- .../dataset/test_random_crop_and_resize.py | 19 +- .../dataset/test_text_jieba_tokenizer.py | 21 +- .../ut/python/dataset/test_uniform_augment.py | 21 +- tests/ut/python/dataset/test_vocab.py | 14 +- 14 files changed, 1249 insertions(+), 1138 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/execute.cc b/mindspore/ccsrc/minddata/dataset/api/execute.cc index c1bbb9b9392..bfa05669565 100644 --- a/mindspore/ccsrc/minddata/dataset/api/execute.cc +++ b/mindspore/ccsrc/minddata/dataset/api/execute.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,11 @@ * limitations under the License. */ -#include "minddata/dataset/include/execute.h" +#include "minddata/dataset/core/tensor_row.h" #ifdef ENABLE_ANDROID #include "minddata/dataset/include/de_tensor.h" #endif +#include "minddata/dataset/include/execute.h" #include "minddata/dataset/include/tensor.h" #include "minddata/dataset/kernels/tensor_op.h" #ifndef ENABLE_ANDROID @@ -84,5 +85,25 @@ std::shared_ptr Execute::operator()(std::shared_ptr> &input_tensor_list, + std::vector> *output_tensor_list) { + CHECK_FAIL_RETURN_UNEXPECTED(op_ != nullptr, "Input TensorOperation is not valid"); + CHECK_FAIL_RETURN_UNEXPECTED(!input_tensor_list.empty(), "Input Tensor is not valid"); + + TensorRow input, output; + std::copy(input_tensor_list.begin(), input_tensor_list.end(), std::back_inserter(input)); + CHECK_FAIL_RETURN_UNEXPECTED(!input.empty(), "Input Tensor is not valid"); + + std::shared_ptr transform = op_->Build(); + Status rc = transform->Compute(input, &output); + if (rc.IsError()) { + // execution failed + RETURN_STATUS_UNEXPECTED("Operation execution failed : " + rc.ToString()); + } + + std::copy(output.begin(), output.end(), std::back_inserter(*output_tensor_list)); + return Status::OK(); +} + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc index cc0510951fd..56dc7d793e3 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/execute_binding.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,14 +28,26 @@ PYBIND_REGISTER(Execute, 0, ([](const py::module *m) { auto execute = std::make_shared(toTensorOperation(operation)); return execute; })) - .def("__call__", [](Execute &self, std::shared_ptr in) { - std::shared_ptr out = self(in); - if (out == nullptr) { + .def("__call__", + [](Execute &self, std::shared_ptr in) { + std::shared_ptr out = self(in); + if (out == nullptr) { + THROW_IF_ERROR([]() { + RETURN_STATUS_UNEXPECTED( + "Failed to execute op in eager mode, please check ERROR log above."); + }()); + } + return out; + }) + .def("__call__", [](Execute &self, const std::vector> &input_tensor_list) { + std::vector> output_tensor_list; + THROW_IF_ERROR(self(input_tensor_list, &output_tensor_list)); + if (output_tensor_list.empty()) { THROW_IF_ERROR([]() { RETURN_STATUS_UNEXPECTED("Failed to execute op in eager mode, please check ERROR log above."); }()); } - return out; + return output_tensor_list; }); })); } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/include/execute.h b/mindspore/ccsrc/minddata/dataset/include/execute.h index 87772907e8a..d4320e43e3e 100644 --- a/mindspore/ccsrc/minddata/dataset/include/execute.h +++ b/mindspore/ccsrc/minddata/dataset/include/execute.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,16 +43,23 @@ class Execute { #ifdef ENABLE_ANDROID /// \brief callable function to execute the TensorOperation in eager mode - /// \param[inout] input - the tensor to be transformed + /// \param[in] input - the tensor to be transformed /// \return - the output tensor, nullptr if Compute fails std::shared_ptr operator()(std::shared_ptr input); #endif /// \brief callable function to execute the TensorOperation in eager mode - /// \param[inout] input - the tensor to be transformed + /// \param[in] input - the tensor to be transformed /// \return - the output tensor, nullptr if Compute fails std::shared_ptr operator()(std::shared_ptr input); + /// \brief callable function to execute the TensorOperation in eager mode + /// \param[in] input_tensor_list - the tensor to be transformed + /// \param[out] out - the result tensor after transform + /// \return - Status + Status operator()(const std::vector> &input_tensor_list, + std::vector> *out); + private: std::shared_ptr op_; }; diff --git a/mindspore/dataset/text/transforms.py b/mindspore/dataset/text/transforms.py index 358c9a7a29d..858a8f576aa 100644 --- a/mindspore/dataset/text/transforms.py +++ b/mindspore/dataset/text/transforms.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -59,112 +59,37 @@ from .validators import check_lookup, check_jieba_add_dict, \ check_to_number, check_bert_tokenizer, check_python_tokenizer, check_slidingwindow from ..core.datatypes import mstype_to_detype from ..core.validator_helpers import replace_none +from ..transforms.c_transforms import TensorOperation -class TextTensorOperation: - def parse(self): - raise NotImplementedError("TextTensorOperation has to implement parse method.") -class Lookup(TextTensorOperation): +class TextTensorOperation(TensorOperation): """ - Lookup operator that looks up a word to an id. - - Args: - vocab (Vocab): A vocabulary object. - unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV). - If unknown_token is OOV, a runtime error will be thrown (default=None). - data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32) - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # Load vocabulary from list - >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) - >>> # Use Lookup operator to map tokens to ids - >>> lookup = text.Lookup(vocab) - >>> data1 = data1.map(operations=[lookup]) + Base class of Text Tensor Ops """ - - @check_lookup - def __init__(self, vocab, unknown_token=None, data_type=mstype.int32): - self.vocab = vocab - self.unknown_token = replace_none(unknown_token, '') - self.data_type = data_type + def __call__(self, input_tensor): + if not isinstance(input_tensor, list): + input_list = [input_tensor] + else: + input_list = input_tensor + tensor_list = [] + for tensor in input_list: + if not isinstance(tensor, str): + raise TypeError("Input should be string or list of strings, got {}.".format(type(tensor))) + tensor_list.append(cde.Tensor(np.asarray(tensor))) + callable_op = cde.Execute(self.parse()) + output_list = callable_op(tensor_list) + for i, element in enumerate(output_list): + arr = element.as_array() + if arr.dtype.char == 'S': + output_list[i] = to_str(arr) + else: + output_list[i] = arr + if not isinstance(input_tensor, list) and len(output_list) == 1: + output_list = output_list[0] + return output_list def parse(self): - return cde.LookupOperation(self.vocab, self.unknown_token, str(mstype_to_detype(self.data_type))) - - -class SlidingWindow(TextTensorOperation): - """ - TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis - is a slice of data starting at the corresponding position, with a specified width. - - Args: - width (int): The width of the window. It must be an integer and greater than zero. - axis (int, optional): The axis along which the sliding window is computed (default=0). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # Data before - >>> # | col1 | - >>> # +-------------+ - >>> # | [1,2,3,4,5] | - >>> # +-------------+ - >>> data1 = data1.map(operations=text.SlidingWindow(3, 0)) - >>> # Data after - >>> # | col1 | - >>> # +-------------+ - >>> # | [[1,2,3], | - >>> # | [2,3,4], | - >>> # | [3,4,5]] | - >>> # +--------------+ - """ - - @check_slidingwindow - def __init__(self, width, axis=0): - self.width = width - self.axis = axis - - def parse(self): - return cde.SlidingWindowOperation(self.width, self.axis) - - -class Ngram(TextTensorOperation): - """ - TensorOp to generate n-gram from a 1-D string Tensor. - - Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. - - Args: - n (list[int]): n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result - would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up - for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in - an empty string produced. - left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width - will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None). - right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence. - pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" - (default=None). - separator (str, optional): symbol used to join strings together. For example. if 2-gram is - ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"] - (default=None, which means whitespace is used). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> data1 = data1.map(operations=text.Ngram(3, separator=" ")) - """ - - @check_ngram - def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "): - self.ngrams = n - self.left_pad = left_pad - self.right_pad = right_pad - self.separator = separator - - def parse(self): - return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator) + raise NotImplementedError("TextTensorOperation has to implement parse() method.") DE_C_INTER_JIEBA_MODE = { @@ -174,6 +99,18 @@ DE_C_INTER_JIEBA_MODE = { } +DE_C_INTER_SENTENCEPIECE_LOADTYPE = { + SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE, + SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL +} + + +DE_C_INTER_SENTENCEPIECE_OUTTYPE = { + SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString, + SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT +} + + class JiebaTokenizer(TextTensorOperation): """ Tokenize Chinese string into words based on dictionary. @@ -335,6 +272,201 @@ class JiebaTokenizer(TextTensorOperation): " jieba mode file {} is not exist.".format(model_path)) +class Lookup(TextTensorOperation): + """ + Lookup operator that looks up a word to an id. + + Args: + vocab (Vocab): A vocabulary object. + unknown_token (str, optional): Word used for lookup if the word being looked up is out-of-vocabulary (OOV). + If unknown_token is OOV, a runtime error will be thrown (default=None). + data_type (mindspore.dtype, optional): mindspore.dtype that lookup maps string to (default=mstype.int32) + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # Load vocabulary from list + >>> vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) + >>> # Use Lookup operator to map tokens to ids + >>> lookup = text.Lookup(vocab) + >>> data1 = data1.map(operations=[lookup]) + """ + + @check_lookup + def __init__(self, vocab, unknown_token=None, data_type=mstype.int32): + self.vocab = vocab + self.unknown_token = replace_none(unknown_token, '') + self.data_type = data_type + + def parse(self): + return cde.LookupOperation(self.vocab, self.unknown_token, str(mstype_to_detype(self.data_type))) + + +class Ngram(TextTensorOperation): + """ + TensorOp to generate n-gram from a 1-D string Tensor. + + Refer to https://en.wikipedia.org/wiki/N-gram#Examples for an overview of what n-gram is and how it works. + + Args: + n (list[int]): n in n-gram, n >= 1. n is a list of positive integers. For example, if n=[4,3], then the result + would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up + for a n-gram, an empty string will be returned. For example, 3 grams on ["mindspore","best"] will result in + an empty string produced. + left_pad (tuple, optional): ("pad_token", pad_width). Padding performed on left side of the sequence. pad_width + will be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default=None). + right_pad (tuple, optional): ("pad_token", pad_width). Padding performed on right side of the sequence. + pad_width will be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" + (default=None). + separator (str, optional): symbol used to join strings together. For example. if 2-gram is + ["mindspore", "amazing"] with separator="-", the result would be ["mindspore-amazing"] + (default=None, which means whitespace is used). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> data1 = data1.map(operations=text.Ngram(3, separator=" ")) + """ + + @check_ngram + def __init__(self, n, left_pad=("", 0), right_pad=("", 0), separator=" "): + self.ngrams = n + self.left_pad = left_pad + self.right_pad = right_pad + self.separator = separator + + def parse(self): + return cde.NgramOperation(self.ngrams, self.left_pad, self.right_pad, self.separator) + + +class SentencePieceTokenizer(TextTensorOperation): + """ + Tokenize scalar token or 1-D tokens to tokens by sentencepiece. + + Args: + mode (Union[str, SentencePieceVocab]): If the input parameter is a file, then it is of type string. + If the input parameter is a SentencePieceVocab object, then it is of type SentencePieceVocab. + out_type (Union[str, int]): The type of output. + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, {}) + >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING) + >>> data1 = data1.map(operations=tokenizer) + """ + + def __init__(self, mode, out_type): + self.mode = mode + self.out_type = out_type + + def parse(self): + return cde.SentencePieceTokenizerOperation(self.mode, DE_C_INTER_SENTENCEPIECE_OUTTYPE[self.out_type]) + + +class SlidingWindow(TextTensorOperation): + """ + TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension axis + is a slice of data starting at the corresponding position, with a specified width. + + Args: + width (int): The width of the window. It must be an integer and greater than zero. + axis (int, optional): The axis along which the sliding window is computed (default=0). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # Data before + >>> # | col1 | + >>> # +-------------+ + >>> # | [1,2,3,4,5] | + >>> # +-------------+ + >>> data1 = data1.map(operations=text.SlidingWindow(3, 0)) + >>> # Data after + >>> # | col1 | + >>> # +-------------+ + >>> # | [[1,2,3], | + >>> # | [2,3,4], | + >>> # | [3,4,5]] | + >>> # +--------------+ + """ + + @check_slidingwindow + def __init__(self, width, axis=0): + self.width = width + self.axis = axis + + def parse(self): + return cde.SlidingWindowOperation(self.width, self.axis) + + +class ToNumber(TextTensorOperation): + """ + Tensor operation to convert every element of a string tensor to a number. + + Strings are casted according to the rules specified in the following links: + https://en.cppreference.com/w/cpp/string/basic_string/stof, + https://en.cppreference.com/w/cpp/string/basic_string/stoul, + except that any strings which represent negative numbers cannot be cast to an + unsigned integer type. + + Args: + data_type (mindspore.dtype): mindspore.dtype to be casted to. Must be + a numeric type. + + Raises: + RuntimeError: If strings are invalid to cast, or are out of range after being casted. + + Examples: + >>> import mindspore.dataset.text as text + >>> import mindspore.common.dtype as mstype + >>> + >>> to_number_op = text.ToNumber(mstype.int8) + >>> data1 = data1.map(operations=to_number_op) + """ + + @check_to_number + def __init__(self, data_type): + data_type = mstype_to_detype(data_type) + self.data_type = str(data_type) + + def parse(self): + return cde.ToNumberOperation(self.data_type) + + +class TruncateSequencePair(TextTensorOperation): + """ + Truncate a pair of rank-1 tensors such that the total length is less than max_length. + + This operation takes two input tensors and returns two output Tensors. + + Args: + max_length (int): Maximum length required. + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # Data before + >>> # | col1 | col2 | + >>> # +---------+---------| + >>> # | [1,2,3] | [4,5] | + >>> # +---------+---------+ + >>> data1 = data1.map(operations=text.TruncateSequencePair(4)) + >>> # Data after + >>> # | col1 | col2 | + >>> # +---------+---------+ + >>> # | [1,2] | [4,5] | + >>> # +---------+---------+ + """ + + @check_pair_truncate + def __init__(self, max_length): + self.max_length = max_length + + def parse(self): + return cde.TruncateSequencePairOperation(self.max_length) + + class UnicodeCharTokenizer(TextTensorOperation): """ Tokenize a scalar tensor of UTF-8 string to Unicode characters. @@ -405,131 +537,31 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp): self.unknown_token, self.with_offsets) -DE_C_INTER_SENTENCEPIECE_LOADTYPE = { - SPieceTokenizerLoadType.FILE: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KFILE, - SPieceTokenizerLoadType.MODEL: cde.SPieceTokenizerLoadType.DE_SPIECE_TOKENIZER_LOAD_KMODEL -} - -DE_C_INTER_SENTENCEPIECE_OUTTYPE = { - SPieceTokenizerOutType.STRING: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KString, - SPieceTokenizerOutType.INT: cde.SPieceTokenizerOutType.DE_SPIECE_TOKENIZER_OUTTYPE_KINT -} - - -class SentencePieceTokenizer(TextTensorOperation): +class PythonTokenizer: """ - Tokenize scalar token or 1-D tokens to tokens by sentencepiece. + Callable class to be used for user-defined string tokenizer. Args: - mode (Union[str, SentencePieceVocab]): If the input parameter is a file, then it is of type string. - If the input parameter is a SentencePieceVocab object, then it is of type SentencePieceVocab. - out_type (Union[str, int]): The type of output. + tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens. Examples: >>> import mindspore.dataset.text as text >>> - >>> vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, {}) - >>> tokenizer = text.SentencePieceTokenizer(vocab, out_type=SPieceTokenizerOutType.STRING) - >>> data1 = data1.map(operations=tokenizer) + >>> def my_tokenizer(line): + >>> return line.split() + >>> data1 = data1.map(operations=text.PythonTokenizer(my_tokenizer)) """ - def __init__(self, mode, out_type): - self.mode = mode - self.out_type = out_type - - def parse(self): - return cde.SentencePieceTokenizerOperation(self.mode, DE_C_INTER_SENTENCEPIECE_OUTTYPE[self.out_type]) + @check_python_tokenizer + def __init__(self, tokenizer): + self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)') + def __call__(self, in_array): + in_array = to_str(in_array) + tokens = self.tokenizer(in_array) + return tokens if platform.system().lower() != 'windows': - class WhitespaceTokenizer(TextTensorOperation): - """ - Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces, such as: ' ', '\\\\t', '\\\\r', '\\\\n'. - - Note: - WhitespaceTokenizer is not supported on Windows platform yet. - - Args: - with_offsets (bool, optional): If or not output offsets of tokens (default=False). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.WhitespaceTokenizer() - >>> data1 = data1.map(operations=tokenizer_op) - >>> # If with_offsets=False, then output three columns {["token", dtype=str], - >>> # ["offsets_start", dtype=uint32], - >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.WhitespaceTokenizer(True) - >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], - >>> output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"]) - """ - - @check_with_offsets - def __init__(self, with_offsets=False): - self.with_offsets = with_offsets - - def parse(self): - return cde.WhitespaceTokenizerOperation(self.with_offsets) - - - class UnicodeScriptTokenizer(TextTensorOperation): - """ - Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries. - - Note: - UnicodeScriptTokenizer is not supported on Windows platform yet. - - Args: - keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False). - with_offsets (bool, optional): If or not output offsets of tokens (default=False). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=False) - >>> data1 = data1.map(operations=tokenizer_op) - >>> # If with_offsets=False, then output three columns {["token", dtype=str], - >>> # ["offsets_start", dtype=uint32], - >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) - >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], - >>> output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"]) - """ - - @check_unicode_script_tokenizer - def __init__(self, keep_whitespace=False, with_offsets=False): - keep_whitespace = replace_none(keep_whitespace, False) - with_offsets = replace_none(with_offsets, False) - self.keep_whitespace = keep_whitespace - self.with_offsets = with_offsets - - def parse(self): - return cde.UnicodeScriptTokenizerOperation(self.keep_whitespace, self.with_offsets) - - - class CaseFold(TextTensorOperation): - """ - Apply case fold operation on UTF-8 string tensor. - - Note: - CaseFold is not supported on Windows platform yet. - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> case_op = text.CaseFold() - >>> data1 = data1.map(operations=case_op) - """ - - def parse(self): - return cde.CaseFoldOperation() - - DE_C_INTER_NORMALIZE_FORM = { NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC, @@ -539,118 +571,6 @@ if platform.system().lower() != 'windows': } - class NormalizeUTF8(TextTensorOperation): - """ - Apply normalize operation on UTF-8 string tensor. - - Note: - NormalizeUTF8 is not supported on Windows platform yet. - - Args: - normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE, - NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD, - NormalizeForm.NFKD](default=NormalizeForm.NFKC). - See http://unicode.org/reports/tr15/ for details. - - - NormalizeForm.NONE, do nothing for input string tensor. - - NormalizeForm.NFC, normalize with Normalization Form C. - - NormalizeForm.NFKC, normalize with Normalization Form KC. - - NormalizeForm.NFD, normalize with Normalization Form D. - - NormalizeForm.NFKD, normalize with Normalization Form KD. - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> normalize_op = text.NormalizeUTF8(normalize_form=NormalizeForm.NFC) - >>> data1 = data1.map(operations=normalize_op) - """ - - def __init__(self, normalize_form=NormalizeForm.NFKC): - if not isinstance(normalize_form, NormalizeForm): - raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") - - normalize_form = replace_none(normalize_form, NormalizeForm.NFKC) - self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form] - - def parse(self): - return cde.NormalizeUTF8Operation(self.normalize_form) - - - class RegexReplace(TextTensorOperation): - """ - Replace UTF-8 string tensor with 'replace' according to regular expression 'pattern'. - - See http://userguide.icu-project.org/strings/regexp for support regex pattern. - - Note: - RegexReplace is not supported on Windows platform yet. - - Args: - pattern (str): the regex expression patterns. - replace (str): the string to replace matched element. - replace_all (bool, optional): If False, only replace first matched element; - if True, replace all matched elements (default=True). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> pattern = 'Canada' - >>> replace = 'China' - >>> replace_op = text.RegexReplace(pattern, replace) - >>> data1 = data1.map(operations=replace_op) - """ - - def __init__(self, pattern, replace, replace_all=True): - self.pattern = pattern - self.replace = replace - self.replace_all = replace_all - - def parse(self): - return cde.RegexReplaceOperation(self.pattern, self.replace, self.replace_all) - - - class RegexTokenizer(TextTensorOperation): - """ - Tokenize a scalar tensor of UTF-8 string by regex expression pattern. - - See http://userguide.icu-project.org/strings/regexp for support regex pattern. - - Note: - RegexTokenizer is not supported on Windows platform yet. - - Args: - delim_pattern (str): The pattern of regex delimiters. - The original string will be split by matched elements. - keep_delim_pattern (str, optional): The string matched by 'delim_pattern' can be kept as a token - if it can be matched by 'keep_delim_pattern'. The default value is an empty str ('') - which means that delimiters will not be kept as an output token (default=''). - with_offsets (bool, optional): If or not output offsets of tokens (default=False). - - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # If with_offsets=False, default output one column {["text", dtype=str]} - >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=False) - >>> data1 = data1.map(operations=tokenizer_op) - >>> # If with_offsets=False, then output three columns {["token", dtype=str], - >>> # ["offsets_start", dtype=uint32], - >>> # ["offsets_limit", dtype=uint32]} - >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) - >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], - >>> output_columns=["token", "offsets_start", "offsets_limit"], - >>> column_order=["token", "offsets_start", "offsets_limit"]) - """ - - @check_regex_tokenizer - def __init__(self, delim_pattern, keep_delim_pattern='', with_offsets=False): - self.delim_pattern = delim_pattern - self.keep_delim_pattern = keep_delim_pattern - self.with_offsets = with_offsets - - def parse(self): - return cde.RegexTokenizerOperation(self.delim_pattern, self.keep_delim_pattern, self.with_offsets) - - class BasicTokenizer(TextTensorOperation): """ Tokenize a scalar tensor of UTF-8 string by specific rules. @@ -776,93 +696,201 @@ if platform.system().lower() != 'windows': self.normalization_form, self.preserve_unused_token, self.with_offsets) -class TruncateSequencePair(TextTensorOperation): - """ - Truncate a pair of rank-1 tensors such that the total length is less than max_length. + class CaseFold(TextTensorOperation): + """ + Apply case fold operation on UTF-8 string tensor. - This operation takes two input tensors and returns two output Tensors. + Note: + CaseFold is not supported on Windows platform yet. - Args: - max_length (int): Maximum length required. + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> case_op = text.CaseFold() + >>> data1 = data1.map(operations=case_op) + """ - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> # Data before - >>> # | col1 | col2 | - >>> # +---------+---------| - >>> # | [1,2,3] | [4,5] | - >>> # +---------+---------+ - >>> data1 = data1.map(operations=text.TruncateSequencePair(4)) - >>> # Data after - >>> # | col1 | col2 | - >>> # +---------+---------+ - >>> # | [1,2] | [4,5] | - >>> # +---------+---------+ - """ - - @check_pair_truncate - def __init__(self, max_length): - self.max_length = max_length - - def parse(self): - return cde.TruncateSequencePairOperation(self.max_length) + def parse(self): + return cde.CaseFoldOperation() -class ToNumber(TextTensorOperation): - """ - Tensor operation to convert every element of a string tensor to a number. + class NormalizeUTF8(TextTensorOperation): + """ + Apply normalize operation on UTF-8 string tensor. - Strings are casted according to the rules specified in the following links: - https://en.cppreference.com/w/cpp/string/basic_string/stof, - https://en.cppreference.com/w/cpp/string/basic_string/stoul, - except that any strings which represent negative numbers cannot be cast to an - unsigned integer type. + Note: + NormalizeUTF8 is not supported on Windows platform yet. - Args: - data_type (mindspore.dtype): mindspore.dtype to be casted to. Must be - a numeric type. + Args: + normalize_form (NormalizeForm, optional): Valid values can be any of [NormalizeForm.NONE, + NormalizeForm.NFC, NormalizeForm.NFKC, NormalizeForm.NFD, + NormalizeForm.NFKD](default=NormalizeForm.NFKC). + See http://unicode.org/reports/tr15/ for details. - Raises: - RuntimeError: If strings are invalid to cast, or are out of range after being casted. + - NormalizeForm.NONE, do nothing for input string tensor. + - NormalizeForm.NFC, normalize with Normalization Form C. + - NormalizeForm.NFKC, normalize with Normalization Form KC. + - NormalizeForm.NFD, normalize with Normalization Form D. + - NormalizeForm.NFKD, normalize with Normalization Form KD. - Examples: - >>> import mindspore.dataset.text as text - >>> import mindspore.common.dtype as mstype - >>> - >>> to_number_op = text.ToNumber(mstype.int8) - >>> data1 = data1.map(operations=to_number_op) - """ + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> normalize_op = text.NormalizeUTF8(normalize_form=NormalizeForm.NFC) + >>> data1 = data1.map(operations=normalize_op) + """ - @check_to_number - def __init__(self, data_type): - data_type = mstype_to_detype(data_type) - self.data_type = str(data_type) + def __init__(self, normalize_form=NormalizeForm.NFKC): + if not isinstance(normalize_form, NormalizeForm): + raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.") - def parse(self): - return cde.ToNumberOperation(self.data_type) + normalize_form = replace_none(normalize_form, NormalizeForm.NFKC) + self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form] + + def parse(self): + return cde.NormalizeUTF8Operation(self.normalize_form) -class PythonTokenizer: - """ - Callable class to be used for user-defined string tokenizer. + class RegexReplace(TextTensorOperation): + """ + Replace UTF-8 string tensor with 'replace' according to regular expression 'pattern'. - Args: - tokenizer (Callable): Python function that takes a `str` and returns a list of `str` as tokens. + See http://userguide.icu-project.org/strings/regexp for support regex pattern. - Examples: - >>> import mindspore.dataset.text as text - >>> - >>> def my_tokenizer(line): - >>> return line.split() - >>> data1 = data1.map(operations=text.PythonTokenizer(my_tokenizer)) - """ + Note: + RegexReplace is not supported on Windows platform yet. - @check_python_tokenizer - def __init__(self, tokenizer): - self.tokenizer = np.vectorize(lambda x: np.array(tokenizer(x), dtype='U'), signature='()->(n)') + Args: + pattern (str): the regex expression patterns. + replace (str): the string to replace matched element. + replace_all (bool, optional): If False, only replace first matched element; + if True, replace all matched elements (default=True). - def __call__(self, in_array): - in_array = to_str(in_array) - tokens = self.tokenizer(in_array) - return tokens + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> pattern = 'Canada' + >>> replace = 'China' + >>> replace_op = text.RegexReplace(pattern, replace) + >>> data1 = data1.map(operations=replace_op) + """ + + def __init__(self, pattern, replace, replace_all=True): + self.pattern = pattern + self.replace = replace + self.replace_all = replace_all + + def parse(self): + return cde.RegexReplaceOperation(self.pattern, self.replace, self.replace_all) + + + class RegexTokenizer(TextTensorOperation): + """ + Tokenize a scalar tensor of UTF-8 string by regex expression pattern. + + See http://userguide.icu-project.org/strings/regexp for support regex pattern. + + Note: + RegexTokenizer is not supported on Windows platform yet. + + Args: + delim_pattern (str): The pattern of regex delimiters. + The original string will be split by matched elements. + keep_delim_pattern (str, optional): The string matched by 'delim_pattern' can be kept as a token + if it can be matched by 'keep_delim_pattern'. The default value is an empty str ('') + which means that delimiters will not be kept as an output token (default=''). + with_offsets (bool, optional): If or not output offsets of tokens (default=False). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # If with_offsets=False, default output one column {["text", dtype=str]} + >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=False) + >>> data1 = data1.map(operations=tokenizer_op) + >>> # If with_offsets=False, then output three columns {["token", dtype=str], + >>> # ["offsets_start", dtype=uint32], + >>> # ["offsets_limit", dtype=uint32]} + >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) + >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) + """ + + @check_regex_tokenizer + def __init__(self, delim_pattern, keep_delim_pattern='', with_offsets=False): + self.delim_pattern = delim_pattern + self.keep_delim_pattern = keep_delim_pattern + self.with_offsets = with_offsets + + def parse(self): + return cde.RegexTokenizerOperation(self.delim_pattern, self.keep_delim_pattern, self.with_offsets) + + + class UnicodeScriptTokenizer(TextTensorOperation): + """ + Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries. + + Note: + UnicodeScriptTokenizer is not supported on Windows platform yet. + + Args: + keep_whitespace (bool, optional): If or not emit whitespace tokens (default=False). + with_offsets (bool, optional): If or not output offsets of tokens (default=False). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # If with_offsets=False, default output one column {["text", dtype=str]} + >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=False) + >>> data1 = data1.map(operations=tokenizer_op) + >>> # If with_offsets=False, then output three columns {["token", dtype=str], + >>> # ["offsets_start", dtype=uint32], + >>> # ["offsets_limit", dtype=uint32]} + >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) + >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) + """ + + @check_unicode_script_tokenizer + def __init__(self, keep_whitespace=False, with_offsets=False): + keep_whitespace = replace_none(keep_whitespace, False) + with_offsets = replace_none(with_offsets, False) + self.keep_whitespace = keep_whitespace + self.with_offsets = with_offsets + + def parse(self): + return cde.UnicodeScriptTokenizerOperation(self.keep_whitespace, self.with_offsets) + + + class WhitespaceTokenizer(TextTensorOperation): + """ + Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces, such as: ' ', '\\\\t', '\\\\r', '\\\\n'. + + Note: + WhitespaceTokenizer is not supported on Windows platform yet. + + Args: + with_offsets (bool, optional): If or not output offsets of tokens (default=False). + + Examples: + >>> import mindspore.dataset.text as text + >>> + >>> # If with_offsets=False, default output one column {["text", dtype=str]} + >>> tokenizer_op = text.WhitespaceTokenizer() + >>> data1 = data1.map(operations=tokenizer_op) + >>> # If with_offsets=False, then output three columns {["token", dtype=str], + >>> # ["offsets_start", dtype=uint32], + >>> # ["offsets_limit", dtype=uint32]} + >>> tokenizer_op = text.WhitespaceTokenizer(True) + >>> data2 = data2.map(operations=tokenizer_op, input_columns=["text"], + >>> output_columns=["token", "offsets_start", "offsets_limit"], + >>> column_order=["token", "offsets_start", "offsets_limit"]) + """ + + @check_with_offsets + def __init__(self, with_offsets=False): + self.with_offsets = with_offsets + + def parse(self): + return cde.WhitespaceTokenizerOperation(self.with_offsets) diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 2a79f2d1827..ae4b8e9a941 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,14 @@ from .validators import check_num_classes, check_de_type, check_fill_value, chec from ..core.datatypes import mstype_to_detype +class TensorOperation: + def __call__(self): + raise NotImplementedError("TensorOperation has to implement __call__() method.") + + def parse(self): + raise NotImplementedError("TensorOperation has to implement parse() method.") + + class OneHot(cde.OneHotOp): """ Tensor operation to apply one hot encoding. @@ -304,7 +312,7 @@ class Unique(cde.UniqueOp): Also return an index tensor that contains the index of each element of the input tensor in the Unique output tensor. - Finally, return a count tensor that constains the count of each element of + Finally, return a count tensor that contains the count of each element of the output tensor in the input tensor. Note: diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py index 60723972201..f3ae5cea8a5 100644 --- a/mindspore/dataset/transforms/validators.py +++ b/mindspore/dataset/transforms/validators.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -237,8 +237,8 @@ def check_compose_list(method): type_check(transforms, (list,), transforms) if not transforms: raise ValueError("transforms list is empty.") - for i, transfrom in enumerate(transforms): - if not callable(transfrom): + for i, transform in enumerate(transforms): + if not callable(transform): raise ValueError("transforms[{}] is not callable.".format(i)) return method(self, *args, **kwargs) @@ -269,9 +269,10 @@ def check_random_apply(method): [transforms, prob], _ = parse_user_args(method, *args, **kwargs) type_check(transforms, (list,), "transforms") - for i, transfrom in enumerate(transforms): - if not callable(transfrom): - raise ValueError("transforms[{}] is not callable.".format(i)) + for i, transform in enumerate(transforms): + if str(transform).find("c_transform") >= 0: + raise ValueError("transforms[{}] is not a py transforms. Should not use a c transform in py transform" \ + .format(i)) if prob is not None: type_check(prob, (float, int,), "prob") @@ -290,9 +291,10 @@ def check_transforms_list(method): [transforms], _ = parse_user_args(method, *args, **kwargs) type_check(transforms, (list,), "transforms") - for i, transfrom in enumerate(transforms): - if not callable(transfrom): - raise ValueError("transforms[{}] is not callable.".format(i)) + for i, transform in enumerate(transforms): + if str(transform).find("c_transform") >= 0: + raise ValueError("transforms[{}] is not a py transforms. Should not use a c transform in py transform" \ + .format(i)) return method(self, *args, **kwargs) return new_method diff --git a/mindspore/dataset/vision/c_transforms.py b/mindspore/dataset/vision/c_transforms.py index c83eaf618ac..5496dd5c5d3 100644 --- a/mindspore/dataset/vision/c_transforms.py +++ b/mindspore/dataset/vision/c_transforms.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -55,21 +55,55 @@ from .validators import check_prob, check_crop, check_resize_interpolation, chec check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \ check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \ check_cut_mix_batch_c, check_posterize +from ..transforms.c_transforms import TensorOperation + + +class ImageTensorOperation(TensorOperation): + """ + Base class of Image Tensor Ops + """ + def __call__(self, input_tensor): + if not isinstance(input_tensor, list): + input_list = [input_tensor] + else: + input_list = input_tensor + tensor_list = [] + for tensor in input_list: + if not isinstance(tensor, (np.ndarray, Image.Image)): + raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(tensor))) + tensor_list.append(cde.Tensor(np.asarray(tensor))) + callable_op = cde.Execute(self.parse()) + output_list = callable_op(tensor_list) + for i, element in enumerate(output_list): + arr = element.as_array() + if arr.dtype.char == 'S': + output_list[i] = np.char.decode(arr) + else: + output_list[i] = arr + if not isinstance(input_tensor, list) and len(output_list) == 1: + output_list = output_list[0] + return output_list + + def parse(self): + raise NotImplementedError("ImageTensorOperation has to implement parse() method.") -DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, - Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, - Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, - Inter.AREA: cde.InterpolationMode.DE_INTER_AREA} DE_C_BORDER_TYPE = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT, Border.EDGE: cde.BorderType.DE_BORDER_EDGE, Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT, Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC} + DE_C_IMAGE_BATCH_FORMAT = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC, ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW} +DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, + Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, + Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC, + Inter.AREA: cde.InterpolationMode.DE_INTER_AREA} + + def parse_padding(padding): if isinstance(padding, numbers.Number): padding = [padding] * 4 @@ -81,9 +115,6 @@ def parse_padding(padding): padding = tuple(padding) return padding -class ImageTensorOperation: - def parse(self): - raise NotImplementedError("ImageTensorOperation has to implement parse method.") class AutoContrast(ImageTensorOperation): """ @@ -112,95 +143,67 @@ class AutoContrast(ImageTensorOperation): return cde.AutoContrastOperation(self.cutoff, self.ignore) -class RandomSharpness(ImageTensorOperation): +class BoundingBoxAugment(ImageTensorOperation): """ - Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image, - degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image. + Apply a given image transform on a random selection of bounding box regions of a given image. Args: - degrees (tuple, optional): Range of random sharpness adjustment degrees. It should be in (min, max) format. - If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)). - - Raises: - TypeError : If degrees is not a list or tuple. - ValueError: If degrees is negative. - ValueError: If degrees is in (max, min) format instead of (min, max). + transform: C++ transformation function to be applied on random selection + of bounding box regions of a given image. + ratio (float, optional): Ratio of bounding boxes to apply augmentation on. + Range: [0, 1] (default=0.3). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) + >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes + >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) + >>> # map to apply ops + >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op], + ... input_columns=["image", "bbox"], + ... output_columns=["image", "bbox"], + ... column_order=["image", "bbox"]) """ - @check_positive_degrees - def __init__(self, degrees=(0.1, 1.9)): - self.degrees = degrees + @check_bounding_box_augment_cpp + def __init__(self, transform, ratio=0.3): + self.ratio = ratio + self.transform = transform def parse(self): - return cde.RandomSharpnessOperation(self.degrees) + if self.transform and getattr(self.transform, 'parse', None): + transform = self.transform.parse() + else: + transform = self.transform + return cde.BoundingBoxAugmentOperation(transform, self.ratio) -class Equalize(ImageTensorOperation): +class CenterCrop(ImageTensorOperation): """ - Apply histogram equalization on input image. - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - def parse(self): - return cde.EqualizeOperation() - - -class Invert(ImageTensorOperation): - """ - Apply invert on input image in RGB mode. - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.Invert()] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - def parse(self): - return cde.InvertOperation() - - -class Decode(ImageTensorOperation): - """ - Decode the input image in RGB mode. + Crops the input image at the center to the given size. Args: - rgb (bool, optional): Mode of decoding input image (default=True). - If True means format of decoded image is RGB else BGR(deprecated). + size (Union[int, sequence]): The output size of the cropped image. + If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip()] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + >>> # crop image to a square + >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, ... input_columns=["image"]) + >>> # crop image to portrait style + >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))] + >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, + ... input_columns=["image"]) """ - def __init__(self, rgb=True): - self.rgb = rgb - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy): Image to be decoded. - - Returns: - img (NumPy), Decoded image. - """ - if not isinstance(img, np.ndarray) or img.ndim != 1 or img.dtype.type is np.str_: - raise TypeError("Input should be an encoded image with 1-D NumPy type, got {}.".format(type(img))) - decode = cde.Execute(cde.DecodeOperation(self.rgb)) - img = decode(cde.Tensor(np.asarray(img))) - return img.as_array() + @check_crop + def __init__(self, size): + if isinstance(size, int): + size = (size, size) + self.size = size def parse(self): - return cde.DecodeOperation(self.rgb) + return cde.CenterCropOperation(self.size) class CutMixBatch(ImageTensorOperation): @@ -258,6 +261,86 @@ class CutOut(ImageTensorOperation): return cde.CutOutOperation(self.length, self.num_patches) +class Decode(ImageTensorOperation): + """ + Decode the input image in RGB mode. + + Args: + rgb (bool, optional): Mode of decoding input image (default=True). + If True means format of decoded image is RGB else BGR(deprecated). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomHorizontalFlip()] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + def __init__(self, rgb=True): + self.rgb = rgb + + def __call__(self, img): + """ + Call method. + + Args: + img (NumPy): Image to be decoded. + + Returns: + img (NumPy), Decoded image. + """ + if not isinstance(img, np.ndarray) or img.ndim != 1 or img.dtype.type is np.str_: + raise TypeError("Input should be an encoded image with 1-D NumPy type, got {}.".format(type(img))) + decode = cde.Execute(cde.DecodeOperation(self.rgb)) + img = decode(cde.Tensor(np.asarray(img))) + return img.as_array() + + def parse(self): + return cde.DecodeOperation(self.rgb) + + +class Equalize(ImageTensorOperation): + """ + Apply histogram equalization on input image. + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.Equalize()] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + def parse(self): + return cde.EqualizeOperation() + + +class HWC2CHW(ImageTensorOperation): + """ + Transpose the input image; shape (H, W, C) to shape (C, H, W). + + Examples: + >>> transforms_list = [c_vision.Decode(), + ... c_vision.RandomHorizontalFlip(0.75), + ... c_vision.RandomCrop(512), + ... c_vision.HWC2CHW()] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + def parse(self): + return cde.HwcToChwOperation() + + +class Invert(ImageTensorOperation): + """ + Apply invert on input image in RGB mode. + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.Invert()] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + def parse(self): + return cde.InvertOperation() + + class MixUpBatch(ImageTensorOperation): """ Apply MixUp transformation on input batch of images and labels. Each image is multiplied by a random weight (lambda) @@ -313,22 +396,6 @@ class Normalize(ImageTensorOperation): self.mean = mean self.std = std - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image array to be normalized. - - Returns: - img (NumPy), Normalized Image array. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - normalize = cde.Execute(cde.NormalizeOperation(self.mean, self.std)) - img = normalize(cde.Tensor(np.asarray(img))) - return img.as_array() - def parse(self): return cde.NormalizeOperation(self.mean, self.std) @@ -360,26 +427,59 @@ class NormalizePad(ImageTensorOperation): self.std = std self.dtype = dtype - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image array to be normalizepad. - - Returns: - img (NumPy), NormalizePaded Image array. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - normalize_pad = cde.Execute(cde.NormalizePadOperation(self.mean, self.std, self.dtype)) - img = normalize_pad(cde.Tensor(np.asarray(img))) - return img.as_array() - def parse(self): return cde.NormalizePadOperation(self.mean, self.std, self.dtype) +class Pad(ImageTensorOperation): + """ + Pads the image according to padding parameters. + + Args: + padding (Union[int, sequence]): The number of pixels to pad the image. + If a single number is provided, it pads all borders with this value. + If a tuple or list of 2 values are provided, it pads the (left and top) + with the first value and (right and bottom) with the second value. + If 4 values are provided as a list or tuple, + it pads the left, top, right and bottom respectively. + fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for + padding_mode Border.CONSTANT (default=0). + If it is an integer, it is used for all RGB channels. + If it is a 3-tuple, it is used to fill R, G, B channels respectively. + The fill_value values must be in range [0, 255]. + padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of + [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. + + - Border.CONSTANT, means it fills the border with constant values. + + - Border.EDGE, means it pads with the last value on the edge. + + - Border.REFLECT, means it reflects the values on the edge omitting the last + value of edge. + + - Border.SYMMETRIC, means it reflects the values on the edge repeating the last + value of edge. + + Examples: + >>> from mindspore.dataset.vision import Border + >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_pad + def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT): + padding = parse_padding(padding) + if isinstance(fill_value, int): + fill_value = tuple([fill_value] * 3) + self.padding = padding + self.fill_value = fill_value + self.padding_mode = padding_mode + + def parse(self): + return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode]) + + class RandomAffine(ImageTensorOperation): """ Apply Random affine transformation to the input image. @@ -486,6 +586,82 @@ class RandomAffine(ImageTensorOperation): self.fill_value) +class RandomColor(ImageTensorOperation): + """ + Adjust the color of the input image by a fixed or random degree. + This operation works only with 3-channel color images. + + Args: + degrees (sequence, optional): Range of random color adjustment degrees. + It should be in (min, max) format. If min=max, then it is a + single fixed magnitude operation (default=(0.1, 1.9)). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_positive_degrees + def __init__(self, degrees=(0.1, 1.9)): + self.degrees = degrees + + def parse(self): + return cde.RandomColorOperation(*self.degrees) + + +class RandomColorAdjust(ImageTensorOperation): + """ + Randomly adjust the brightness, contrast, saturation, and hue of the input image. + + Args: + brightness (Union[float, tuple], optional): Brightness adjustment factor (default=(1, 1)). Cannot be negative. + If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness]. + If it is a sequence, it should be [min, max] for the range. + contrast (Union[float, tuple], optional): Contrast adjustment factor (default=(1, 1)). Cannot be negative. + If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast]. + If it is a sequence, it should be [min, max] for the range. + saturation (Union[float, tuple], optional): Saturation adjustment factor (default=(1, 1)). Cannot be negative. + If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation]. + If it is a sequence, it should be [min, max] for the range. + hue (Union[float, tuple], optional): Hue adjustment factor (default=(0, 0)). + If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5. + If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5. + + Examples: + >>> decode_op = c_vision.Decode() + >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1), + ... contrast=(0.4, 1), + ... saturation=(0.3, 1)) + >>> transforms_list = [decode_op, transform_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_random_color_adjust + def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): + brightness = self.expand_values(brightness) + contrast = self.expand_values(contrast) + saturation = self.expand_values(saturation) + hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False) + + self.brightness = brightness + self.contrast = contrast + self.saturation = saturation + self.hue = hue + + def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True): + if isinstance(value, numbers.Number): + value = [center - value, center + value] + if non_negative: + value[0] = max(0, value[0]) + check_range(value, bound) + return (value[0], value[1]) + + def parse(self): + return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue) + + class RandomCrop(ImageTensorOperation): """ Crop the input image at a random location. @@ -551,6 +727,58 @@ class RandomCrop(ImageTensorOperation): return cde.RandomCropOperation(self.size, self.padding, self.pad_if_needed, self.fill_value, border_type) +class RandomCropDecodeResize(ImageTensorOperation): + """ + Equivalent to RandomResizedCrop, but crops before decodes. + + Args: + size (Union[int, sequence]): The size of the output image. + If size is an integer, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + scale (tuple, optional): Range [min, max) of respective size of the + original size to be cropped (default=(0.08, 1.0)). + ratio (tuple, optional): Range [min, max) of aspect ratio to be + cropped (default=(3. / 4., 4. / 3.)). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). + It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.BILINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. + + max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). + If exceeded, fall back to use center_crop instead. + + Examples: + >>> from mindspore.dataset.vision import Inter + >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75), + ... scale=(0.25, 0.5), + ... interpolation=Inter.NEAREST, + ... max_attempts=5) + >>> transforms_list = [resize_crop_decode_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_random_resize_crop + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation=Inter.BILINEAR, max_attempts=10): + if isinstance(size, int): + size = (size, size) + self.size = size + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + self.max_attempts = max_attempts + + def parse(self): + return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio, + DE_C_INTER_MODE[self.interpolation], + self.max_attempts) + + class RandomCropWithBBox(ImageTensorOperation): """ Crop the input image at a random location and adjust bounding boxes accordingly. @@ -685,225 +913,6 @@ class RandomPosterize(ImageTensorOperation): return cde.RandomPosterizeOperation(bits) -class RandomVerticalFlip(ImageTensorOperation): - """ - Flip the input image vertically, randomly with a given probability. - - Args: - prob (float, optional): Probability of the image being flipped (default=0.5). - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_prob - def __init__(self, prob=0.5): - self.prob = prob - - def parse(self): - return cde.RandomVerticalFlipOperation(self.prob) - - -class RandomVerticalFlipWithBBox(ImageTensorOperation): - """ - Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. - - Args: - prob (float, optional): Probability of the image being flipped (default=0.5). - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_prob - def __init__(self, prob=0.5): - self.prob = prob - - def parse(self): - return cde.RandomVerticalFlipWithBBoxOperation(self.prob) - - -class BoundingBoxAugment(ImageTensorOperation): - """ - Apply a given image transform on a random selection of bounding box regions of a given image. - - Args: - transform: C++ transformation function to be applied on random selection - of bounding box regions of a given image. - ratio (float, optional): Ratio of bounding boxes to apply augmentation on. - Range: [0, 1] (default=0.3). - - Examples: - >>> # set bounding box operation with ratio of 1 to apply rotation on all bounding boxes - >>> bbox_aug_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) - >>> # map to apply ops - >>> image_folder_dataset = image_folder_dataset.map(operations=[bbox_aug_op], - ... input_columns=["image", "bbox"], - ... output_columns=["image", "bbox"], - ... column_order=["image", "bbox"]) - """ - - @check_bounding_box_augment_cpp - def __init__(self, transform, ratio=0.3): - self.ratio = ratio - self.transform = transform - - def parse(self): - if self.transform and getattr(self.transform, 'parse', None): - transform = self.transform.parse() - else: - transform = self.transform - return cde.BoundingBoxAugmentOperation(transform, self.ratio) - - -class Resize(ImageTensorOperation): - """ - Resize the input image to the given size. - - Args: - size (Union[int, sequence]): The output size of the resized image. - If size is an integer, the smaller edge of the image will be resized to this value with - the same image aspect ratio. - If size is a sequence of length 2, it should be (height, width). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). - It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.LINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - - Inter.AREA, means interpolation method is pixel area interpolation. - - Examples: - >>> from mindspore.dataset.vision import Inter - >>> decode_op = c_vision.Decode() - >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC) - >>> transforms_list = [decode_op, resize_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_resize_interpolation - def __init__(self, size, interpolation=Inter.LINEAR): - if isinstance(size, int): - size = (size,) - self.size = size - self.interpolation = interpolation - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image to be resized. - - Returns: - img (NumPy), Resized image. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - resize = cde.Execute(cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation])) - img = resize(cde.Tensor(np.asarray(img))) - return img.as_array() - - def parse(self): - return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation]) - - -class ResizeWithBBox(ImageTensorOperation): - """ - Resize the input image to the given size and adjust bounding boxes accordingly. - - Args: - size (Union[int, sequence]): The output size of the resized image. - If size is an integer, smaller edge of the image will be resized to this value with - the same image aspect ratio. - If size is a sequence of length 2, it should be (height, width). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). - It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.LINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - Examples: - >>> from mindspore.dataset.vision import Inter - >>> decode_op = c_vision.Decode() - >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST) - >>> transforms_list = [decode_op, bbox_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_resize_interpolation - def __init__(self, size, interpolation=Inter.LINEAR): - self.size = size - self.interpolation = interpolation - - def parse(self): - size = self.size - if isinstance(size, int): - size = (size,) - return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation]) - - -class RandomResizedCropWithBBox(ImageTensorOperation): - """ - Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly. - - Args: - size (Union[int, sequence]): The size of the output image. - If size is an integer, a square crop of size (size, size) is returned. - If size is a sequence of length 2, it should be (height, width). - scale (tuple, optional): Range (min, max) of respective size of the original - size to be cropped (default=(0.08, 1.0)). - ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped - (default=(3. / 4., 4. / 3.)). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). - It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - - - Inter.BILINEAR, means interpolation method is bilinear interpolation. - - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - - Inter.BICUBIC, means interpolation method is bicubic interpolation. - - max_attempts (int, optional): The maximum number of attempts to propose a valid - crop area (default=10). If exceeded, fall back to use center crop instead. - - Examples: - >>> from mindspore.dataset.vision import Inter - >>> decode_op = c_vision.Decode() - >>> bbox_op = c_vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST) - >>> transforms_list = [decode_op, bbox_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_random_resize_crop - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation=Inter.BILINEAR, max_attempts=10): - if isinstance(size, int): - size = (size, size) - self.size = size - self.scale = scale - self.ratio = ratio - self.interpolation = interpolation - self.max_attempts = max_attempts - - def parse(self): - return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio, - DE_C_INTER_MODE[self.interpolation], self.max_attempts) - - class RandomResizedCrop(ImageTensorOperation): """ Crop the input image to a random size and aspect ratio. @@ -954,205 +963,53 @@ class RandomResizedCrop(ImageTensorOperation): self.max_attempts) -class CenterCrop(ImageTensorOperation): +class RandomResizedCropWithBBox(ImageTensorOperation): """ - Crops the input image at the center to the given size. + Crop the input image to a random size and aspect ratio and adjust bounding boxes accordingly. Args: - size (Union[int, sequence]): The output size of the cropped image. + size (Union[int, sequence]): The size of the output image. If size is an integer, a square crop of size (size, size) is returned. If size is a sequence of length 2, it should be (height, width). - - Examples: - >>> # crop image to a square - >>> transforms_list1 = [c_vision.Decode(), c_vision.CenterCrop(50)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, - ... input_columns=["image"]) - >>> # crop image to portrait style - >>> transforms_list2 = [c_vision.Decode(), c_vision.CenterCrop((60, 40))] - >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, - ... input_columns=["image"]) - """ - - @check_crop - def __init__(self, size): - if isinstance(size, int): - size = (size, size) - self.size = size - - def parse(self): - return cde.CenterCropOperation(self.size) - - -class RandomColor(ImageTensorOperation): - """ - Adjust the color of the input image by a fixed or random degree. - This operation works only with 3-channel color images. - - Args: - degrees (sequence, optional): Range of random color adjustment degrees. - It should be in (min, max) format. If min=max, then it is a - single fixed magnitude operation (default=(0.1, 1.9)). - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomColor((0.5, 2.0))] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_positive_degrees - def __init__(self, degrees=(0.1, 1.9)): - self.degrees = degrees - - def parse(self): - return cde.RandomColorOperation(*self.degrees) - - -class RandomColorAdjust(ImageTensorOperation): - """ - Randomly adjust the brightness, contrast, saturation, and hue of the input image. - - Args: - brightness (Union[float, tuple], optional): Brightness adjustment factor (default=(1, 1)). Cannot be negative. - If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness]. - If it is a sequence, it should be [min, max] for the range. - contrast (Union[float, tuple], optional): Contrast adjustment factor (default=(1, 1)). Cannot be negative. - If it is a float, the factor is uniformly chosen from the range [max(0, 1-contrast), 1+contrast]. - If it is a sequence, it should be [min, max] for the range. - saturation (Union[float, tuple], optional): Saturation adjustment factor (default=(1, 1)). Cannot be negative. - If it is a float, the factor is uniformly chosen from the range [max(0, 1-saturation), 1+saturation]. - If it is a sequence, it should be [min, max] for the range. - hue (Union[float, tuple], optional): Hue adjustment factor (default=(0, 0)). - If it is a float, the range will be [-hue, hue]. Value should be 0 <= hue <= 0.5. - If it is a sequence, it should be [min, max] where -0.5 <= min <= max <= 0.5. - - Examples: - >>> decode_op = c_vision.Decode() - >>> transform_op = c_vision.RandomColorAdjust(brightness=(0.5, 1), - ... contrast=(0.4, 1), - ... saturation=(0.3, 1)) - >>> transforms_list = [decode_op, transform_op] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_random_color_adjust - def __init__(self, brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): - brightness = self.expand_values(brightness) - contrast = self.expand_values(contrast) - saturation = self.expand_values(saturation) - hue = self.expand_values(hue, center=0, bound=(-0.5, 0.5), non_negative=False) - - self.brightness = brightness - self.contrast = contrast - self.saturation = saturation - self.hue = hue - - def expand_values(self, value, center=1, bound=(0, FLOAT_MAX_INTEGER), non_negative=True): - if isinstance(value, numbers.Number): - value = [center - value, center + value] - if non_negative: - value[0] = max(0, value[0]) - check_range(value, bound) - return (value[0], value[1]) - - def parse(self): - return cde.RandomColorAdjustOperation(self.brightness, self.contrast, self.saturation, self.hue) - - -class RandomRotation(ImageTensorOperation): - """ - Rotate the input image by a random angle. - - Args: - degrees (Union[int, float, sequence): Range of random rotation degrees. - If degrees is a number, the range will be converted to (-degrees, degrees). - If degrees is a sequence, it should be (min, max). - resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). - If omitted, or if the image has mode "1" or "P", it is set to be Inter.NEAREST. + scale (tuple, optional): Range (min, max) of respective size of the original + size to be cropped (default=(0.08, 1.0)). + ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped + (default=(3. / 4., 4. / 3.)). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - - Inter.BILINEAR, means resample method is bilinear interpolation. + - Inter.BILINEAR, means interpolation method is bilinear interpolation. - - Inter.NEAREST, means resample method is nearest-neighbor interpolation. + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. - - Inter.BICUBIC, means resample method is bicubic interpolation. + - Inter.BICUBIC, means interpolation method is bicubic interpolation. - expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output - image to make it large enough to hold the entire rotated image. - If set to False or omitted, make the output image the same size as the input. - Note that the expand flag assumes rotation around the center and no translation. - center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None). - Origin is the top left corner. None sets to the center of the image. - fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image - (default=0). - If it is a 3-tuple, it is used for R, G, B channels respectively. - If it is an integer, it is used for all RGB channels. + max_attempts (int, optional): The maximum number of attempts to propose a valid + crop area (default=10). If exceeded, fall back to use center crop instead. Examples: >>> from mindspore.dataset.vision import Inter - >>> transforms_list = [c_vision.Decode(), - ... c_vision.RandomRotation(degrees=5.0, - ... resample=Inter.NEAREST, - ... expand=True)] + >>> decode_op = c_vision.Decode() + >>> bbox_op = c_vision.RandomResizedCropWithBBox(size=50, interpolation=Inter.NEAREST) + >>> transforms_list = [decode_op, bbox_op] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - @check_random_rotation - def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0): - self.degrees = degrees - self.resample = resample - self.expand = expand - self.center = center - self.fill_value = fill_value + @check_random_resize_crop + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation=Inter.BILINEAR, max_attempts=10): + if isinstance(size, int): + size = (size, size) + self.size = size + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + self.max_attempts = max_attempts def parse(self): - degrees = (-self.degrees, self.degrees) if isinstance(self.degrees, numbers.Number) else self.degrees - interpolation = DE_C_INTER_MODE[self.resample] - expand = self.expand - center = (-1, -1) if self.center is None else self.center - fill_value = tuple([self.fill_value] * 3) if isinstance(self.fill_value, int) else self.fill_value - return cde.RandomRotationOperation(degrees, interpolation, expand, center, fill_value) - - -class Rescale(ImageTensorOperation): - """ - Tensor operation to rescale the input image. - - Args: - rescale (float): Rescale factor. - shift (float): Shift factor. - - Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_rescale - def __init__(self, rescale, shift): - self.rescale = rescale - self.shift = shift - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image to be rescaled. - - Returns: - img (NumPy), Rescaled image. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - rescale = cde.Execute(cde.RescaleOperation(self.rescale, self.shift)) - img = rescale(cde.Tensor(np.asarray(img))) - return img.as_array() - - def parse(self): - return cde.RescaleOperation(self.rescale, self.shift) + return cde.RandomResizedCropWithBBoxOperation(self.size, self.scale, self.ratio, + DE_C_INTER_MODE[self.interpolation], self.max_attempts) class RandomResize(ImageTensorOperation): @@ -1220,191 +1077,60 @@ class RandomResizeWithBBox(ImageTensorOperation): return cde.RandomResizeWithBBoxOperation(size) -class HWC2CHW(ImageTensorOperation): +class RandomRotation(ImageTensorOperation): """ - Transpose the input image; shape (H, W, C) to shape (C, H, W). - - Examples: - >>> transforms_list = [c_vision.Decode(), - ... c_vision.RandomHorizontalFlip(0.75), - ... c_vision.RandomCrop(512), - ... c_vision.HWC2CHW()] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image array, of shape (H, W, C), to have channels swapped. - - Returns: - img (NumPy), Image array, of shape (C, H, W), with channels swapped. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - hwc2chw = cde.Execute(cde.HwcToChwOperation()) - img = hwc2chw(cde.Tensor(np.asarray(img))) - return img.as_array() - - def parse(self): - return cde.HwcToChwOperation() - - -class RandomCropDecodeResize(ImageTensorOperation): - """ - Equivalent to RandomResizedCrop, but crops before decodes. + Rotate the input image by a random angle. Args: - size (Union[int, sequence]): The size of the output image. - If size is an integer, a square crop of size (size, size) is returned. - If size is a sequence of length 2, it should be (height, width). - scale (tuple, optional): Range [min, max) of respective size of the - original size to be cropped (default=(0.08, 1.0)). - ratio (tuple, optional): Range [min, max) of aspect ratio to be - cropped (default=(3. / 4., 4. / 3.)). - interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). + degrees (Union[int, float, sequence): Range of random rotation degrees. + If degrees is a number, the range will be converted to (-degrees, degrees). + If degrees is a sequence, it should be (min, max). + resample (Inter mode, optional): An optional resampling filter (default=Inter.NEAREST). + If omitted, or if the image has mode "1" or "P", it is set to be Inter.NEAREST. It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. - - Inter.BILINEAR, means interpolation method is bilinear interpolation. + - Inter.BILINEAR, means resample method is bilinear interpolation. - - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + - Inter.NEAREST, means resample method is nearest-neighbor interpolation. - - Inter.BICUBIC, means interpolation method is bicubic interpolation. + - Inter.BICUBIC, means resample method is bicubic interpolation. - max_attempts (int, optional): The maximum number of attempts to propose a valid crop_area (default=10). - If exceeded, fall back to use center_crop instead. + expand (bool, optional): Optional expansion flag (default=False). If set to True, expand the output + image to make it large enough to hold the entire rotated image. + If set to False or omitted, make the output image the same size as the input. + Note that the expand flag assumes rotation around the center and no translation. + center (tuple, optional): Optional center of rotation (a 2-tuple) (default=None). + Origin is the top left corner. None sets to the center of the image. + fill_value (Union[int, tuple], optional): Optional fill color for the area outside the rotated image + (default=0). + If it is a 3-tuple, it is used for R, G, B channels respectively. + If it is an integer, it is used for all RGB channels. Examples: >>> from mindspore.dataset.vision import Inter - >>> resize_crop_decode_op = c_vision.RandomCropDecodeResize(size=(50, 75), - ... scale=(0.25, 0.5), - ... interpolation=Inter.NEAREST, - ... max_attempts=5) - >>> transforms_list = [resize_crop_decode_op] + >>> transforms_list = [c_vision.Decode(), + ... c_vision.RandomRotation(degrees=5.0, + ... resample=Inter.NEAREST, + ... expand=True)] >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) """ - @check_random_resize_crop - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation=Inter.BILINEAR, max_attempts=10): - if isinstance(size, int): - size = (size, size) - self.size = size - self.scale = scale - self.ratio = ratio - self.interpolation = interpolation - self.max_attempts = max_attempts - - def parse(self): - return cde.RandomCropDecodeResizeOperation(self.size, self.scale, self.ratio, - DE_C_INTER_MODE[self.interpolation], - self.max_attempts) - - -class Pad(ImageTensorOperation): - """ - Pads the image according to padding parameters. - - Args: - padding (Union[int, sequence]): The number of pixels to pad the image. - If a single number is provided, it pads all borders with this value. - If a tuple or list of 2 values are provided, it pads the (left and top) - with the first value and (right and bottom) with the second value. - If 4 values are provided as a list or tuple, - it pads the left, top, right and bottom respectively. - fill_value (Union[int, tuple], optional): The pixel intensity of the borders, only valid for - padding_mode Border.CONSTANT (default=0). - If it is an integer, it is used for all RGB channels. - If it is a 3-tuple, it is used to fill R, G, B channels respectively. - The fill_value values must be in range [0, 255]. - padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of - [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. - - - Border.CONSTANT, means it fills the border with constant values. - - - Border.EDGE, means it pads with the last value on the edge. - - - Border.REFLECT, means it reflects the values on the edge omitting the last - value of edge. - - - Border.SYMMETRIC, means it reflects the values on the edge repeating the last - value of edge. - - Examples: - >>> from mindspore.dataset.vision import Border - >>> transforms_list = [c_vision.Decode(), c_vision.Pad([100, 100, 100, 100])] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, - ... input_columns=["image"]) - """ - - @check_pad - def __init__(self, padding, fill_value=0, padding_mode=Border.CONSTANT): - padding = parse_padding(padding) - if isinstance(fill_value, int): - fill_value = tuple([fill_value] * 3) - self.padding = padding + @check_random_rotation + def __init__(self, degrees, resample=Inter.NEAREST, expand=False, center=None, fill_value=0): + self.degrees = degrees + self.resample = resample + self.expand = expand + self.center = center self.fill_value = fill_value - self.padding_mode = padding_mode def parse(self): - return cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode]) - - def __call__(self, img): - """ - Call method. - - Args: - img (NumPy or PIL image): Image to be padded. - - Returns: - img (NumPy), Padded image. - """ - if not isinstance(img, (np.ndarray, Image.Image)): - raise TypeError("Input should be NumPy or PIL image, got {}.".format(type(img))) - pad = cde.Execute(cde.PadOperation(self.padding, self.fill_value, DE_C_BORDER_TYPE[self.padding_mode])) - img = pad(cde.Tensor(np.asarray(img))) - return img.as_array() - - -class UniformAugment(ImageTensorOperation): - """ - Tensor operation to perform randomly selected augmentation. - - Args: - transforms: List of C++ operations (Python operations are not accepted). - num_ops (int, optional): Number of operations to be selected and applied (default=2). - - Examples: - >>> import mindspore.dataset.vision.py_transforms as py_vision - >>> transforms_list = [c_vision.RandomHorizontalFlip(), - ... c_vision.RandomVerticalFlip(), - ... c_vision.RandomColorAdjust(), - ... c_vision.RandomRotation(degrees=45)] - >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2) - >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]), - ... uni_aug_op, py_vision.ToTensor()] - >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all, - ... input_columns="image", - ... num_parallel_workers=1) - """ - - @check_uniform_augment_cpp - def __init__(self, transforms, num_ops=2): - self.transforms = transforms - self.num_ops = num_ops - - def parse(self): - transforms = [] - for op in self.transforms: - if op and getattr(op, 'parse', None): - transforms.append(op.parse()) - else: - transforms.append(op) - return cde.UniformAugOperation(transforms, self.num_ops) + degrees = (-self.degrees, self.degrees) if isinstance(self.degrees, numbers.Number) else self.degrees + interpolation = DE_C_INTER_MODE[self.resample] + expand = self.expand + center = (-1, -1) if self.center is None else self.center + fill_value = tuple([self.fill_value] * 3) if isinstance(self.fill_value, int) else self.fill_value + return cde.RandomRotationOperation(degrees, interpolation, expand, center, fill_value) class RandomSelectSubpolicy(ImageTensorOperation): @@ -1446,44 +1172,199 @@ class RandomSelectSubpolicy(ImageTensorOperation): return cde.RandomSelectSubpolicyOperation(policy) -class SoftDvppDecodeResizeJpeg(ImageTensorOperation): +class RandomSharpness(ImageTensorOperation): """ - Tensor operation to decode and resize JPEG image using the simulation algorithm of - Ascend series chip DVPP module. + Adjust the sharpness of the input image by a fixed or random degree. Degree of 0.0 gives a blurred image, + degree of 1.0 gives the original image, and degree of 2.0 gives a sharpened image. - It is recommended to use this algorithm in the following scenarios: - When training, the DVPP of the Ascend chip is not used, - and the DVPP of the Ascend chip is used during inference, - and the accuracy of inference is lower than the accuracy of training; - and the input image size should be in range [32*32, 8192*8192]. - The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. - Only images with an even resolution can be output. The output of odd resolution is not supported. + Args: + degrees (tuple, optional): Range of random sharpness adjustment degrees. It should be in (min, max) format. + If min=max, then it is a single fixed magnitude operation (default = (0.1, 1.9)). + + Raises: + TypeError : If degrees is not a list or tuple. + ValueError: If degrees is negative. + ValueError: If degrees is in (max, min) format instead of (min, max). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomSharpness(degrees=(0.2, 1.9))] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_positive_degrees + def __init__(self, degrees=(0.1, 1.9)): + self.degrees = degrees + + def parse(self): + return cde.RandomSharpnessOperation(self.degrees) + + +class RandomSolarize(ImageTensorOperation): + """ + Invert all pixel values above a threshold. + + Args: + threshold (tuple, optional): Range of random solarize threshold. Threshold values should always be + in the range (0, 255), include at least one integer value in the given range and + be in (min, max) format. If min=max, then it is a single fixed magnitude operation (default=(0, 255)). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_random_solarize + def __init__(self, threshold=(0, 255)): + self.threshold = threshold + + def parse(self): + return cde.RandomSolarizeOperation(self.threshold) + + +class RandomVerticalFlip(ImageTensorOperation): + """ + Flip the input image vertically, randomly with a given probability. + + Args: + prob (float, optional): Probability of the image being flipped (default=0.5). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlip(0.25)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_prob + def __init__(self, prob=0.5): + self.prob = prob + + def parse(self): + return cde.RandomVerticalFlipOperation(self.prob) + + +class RandomVerticalFlipWithBBox(ImageTensorOperation): + """ + Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. + + Args: + prob (float, optional): Probability of the image being flipped (default=0.5). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.RandomVerticalFlipWithBBox(0.20)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_prob + def __init__(self, prob=0.5): + self.prob = prob + + def parse(self): + return cde.RandomVerticalFlipWithBBoxOperation(self.prob) + + +class Rescale(ImageTensorOperation): + """ + Tensor operation to rescale the input image. + + Args: + rescale (float): Rescale factor. + shift (float): Shift factor. + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.Rescale(1.0 / 255.0, -1.0)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_rescale + def __init__(self, rescale, shift): + self.rescale = rescale + self.shift = shift + + def parse(self): + return cde.RescaleOperation(self.rescale, self.shift) + + +class Resize(ImageTensorOperation): + """ + Resize the input image to the given size. + + Args: + size (Union[int, sequence]): The output size of the resized image. + If size is an integer, the smaller edge of the image will be resized to this value with + the same image aspect ratio. + If size is a sequence of length 2, it should be (height, width). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). + It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.LINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. + + - Inter.AREA, means interpolation method is pixel area interpolation. + + Examples: + >>> from mindspore.dataset.vision import Inter + >>> decode_op = c_vision.Decode() + >>> resize_op = c_vision.Resize([100, 75], Inter.BICUBIC) + >>> transforms_list = [decode_op, resize_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + + @check_resize_interpolation + def __init__(self, size, interpolation=Inter.LINEAR): + if isinstance(size, int): + size = (size,) + self.size = size + self.interpolation = interpolation + + def parse(self): + return cde.ResizeOperation(self.size, DE_C_INTER_MODE[self.interpolation]) + + +class ResizeWithBBox(ImageTensorOperation): + """ + Resize the input image to the given size and adjust bounding boxes accordingly. Args: size (Union[int, sequence]): The output size of the resized image. If size is an integer, smaller edge of the image will be resized to this value with the same image aspect ratio. If size is a sequence of length 2, it should be (height, width). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.LINEAR). + It can be any of [Inter.LINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.LINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. Examples: - >>> # decode and resize image, keeping aspect ratio - >>> transforms_list1 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg(70)] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, + >>> from mindspore.dataset.vision import Inter + >>> decode_op = c_vision.Decode() + >>> bbox_op = c_vision.ResizeWithBBox(50, Inter.NEAREST) + >>> transforms_list = [decode_op, bbox_op] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, ... input_columns=["image"]) - >>> # decode and resize to portrait style - >>> transforms_list2 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg((80, 60))] - >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, - ... input_columns=["image"]) """ - @check_resize - def __init__(self, size): - if isinstance(size, int): - size = (size,) + @check_resize_interpolation + def __init__(self, size, interpolation=Inter.LINEAR): self.size = size + self.interpolation = interpolation def parse(self): - return cde.SoftDvppDecodeResizeJpegOperation(self.size) + size = self.size + if isinstance(size, int): + size = (size,) + return cde.ResizeWithBBoxOperation(size, DE_C_INTER_MODE[self.interpolation]) class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation): @@ -1531,24 +1412,78 @@ class SoftDvppDecodeRandomCropResizeJpeg(ImageTensorOperation): return cde.SoftDvppDecodeRandomCropResizeJpegOperation(self.size, self.scale, self.ratio, self.max_attempts) -class RandomSolarize(ImageTensorOperation): +class SoftDvppDecodeResizeJpeg(ImageTensorOperation): """ - Invert all pixel values above a threshold. + Tensor operation to decode and resize JPEG image using the simulation algorithm of + Ascend series chip DVPP module. + + It is recommended to use this algorithm in the following scenarios: + When training, the DVPP of the Ascend chip is not used, + and the DVPP of the Ascend chip is used during inference, + and the accuracy of inference is lower than the accuracy of training; + and the input image size should be in range [32*32, 8192*8192]. + The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. + Only images with an even resolution can be output. The output of odd resolution is not supported. Args: - threshold (tuple, optional): Range of random solarize threshold. Threshold values should always be - in the range (0, 255), include at least one integer value in the given range and - be in (min, max) format. If min=max, then it is a single fixed magnitude operation (default=(0, 255)). + size (Union[int, sequence]): The output size of the resized image. + If size is an integer, smaller edge of the image will be resized to this value with + the same image aspect ratio. + If size is a sequence of length 2, it should be (height, width). Examples: - >>> transforms_list = [c_vision.Decode(), c_vision.RandomSolarize(threshold=(10,100))] - >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + >>> # decode and resize image, keeping aspect ratio + >>> transforms_list1 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg(70)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list1, ... input_columns=["image"]) + >>> # decode and resize to portrait style + >>> transforms_list2 = [c_vision.Decode(), c_vision.SoftDvppDecodeResizeJpeg((80, 60))] + >>> image_folder_dataset_1 = image_folder_dataset_1.map(operations=transforms_list2, + ... input_columns=["image"]) """ - @check_random_solarize - def __init__(self, threshold=(0, 255)): - self.threshold = threshold + @check_resize + def __init__(self, size): + if isinstance(size, int): + size = (size,) + self.size = size def parse(self): - return cde.RandomSolarizeOperation(self.threshold) + return cde.SoftDvppDecodeResizeJpegOperation(self.size) + + +class UniformAugment(ImageTensorOperation): + """ + Tensor operation to perform randomly selected augmentation. + + Args: + transforms: List of C++ operations (Python operations are not accepted). + num_ops (int, optional): Number of operations to be selected and applied (default=2). + + Examples: + >>> import mindspore.dataset.vision.py_transforms as py_vision + >>> transforms_list = [c_vision.RandomHorizontalFlip(), + ... c_vision.RandomVerticalFlip(), + ... c_vision.RandomColorAdjust(), + ... c_vision.RandomRotation(degrees=45)] + >>> uni_aug_op = c_vision.UniformAugment(transforms=transforms_list, num_ops=2) + >>> transforms_all = [c_vision.Decode(), c_vision.Resize(size=[224, 224]), + ... uni_aug_op, py_vision.ToTensor()] + >>> image_folder_dataset_1 = image_folder_dataset.map(operations=transforms_all, + ... input_columns="image", + ... num_parallel_workers=1) + """ + + @check_uniform_augment_cpp + def __init__(self, transforms, num_ops=2): + self.transforms = transforms + self.num_ops = num_ops + + def parse(self): + transforms = [] + for op in self.transforms: + if op and getattr(op, 'parse', None): + transforms.append(op.parse()) + else: + transforms.append(op) + return cde.UniformAugOperation(transforms, self.num_ops) diff --git a/tests/ut/python/dataset/test_HWC2CHW.py b/tests/ut/python/dataset/test_HWC2CHW.py index ac5936ad0ea..612dc34e275 100644 --- a/tests/ut/python/dataset/test_HWC2CHW.py +++ b/tests/ut/python/dataset/test_HWC2CHW.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,6 +29,20 @@ DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" +def test_HWC2CHW_callable(): + """ + Test HWC2CHW is callable + """ + logger.info("Test HWC2CHW callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + img = c_vision.Decode()(img) + img = c_vision.HWC2CHW()(img) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + assert img.shape == (3, 2268, 4032) + + def test_HWC2CHW(plot=False): """ Test HWC2CHW @@ -122,6 +136,7 @@ def test_HWC2CHW_comp(plot=False): if __name__ == '__main__': + test_HWC2CHW_callable() test_HWC2CHW(True) test_HWC2CHW_md5() test_HWC2CHW_comp(True) diff --git a/tests/ut/python/dataset/test_compose.py b/tests/ut/python/dataset/test_compose.py index db86b1cbebd..10bcfe189d4 100644 --- a/tests/ut/python/dataset/test_compose.py +++ b/tests/ut/python/dataset/test_compose.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -219,7 +219,7 @@ def test_c_py_compose_vision_module(plot=False, run_golden=True): def test_py_transforms_with_c_vision(): """ - These examples will fail, as py_transforms.Random(Apply/Choice/Order) expect callable functions + These examples will fail, as c_transform should not be used in py_transforms.Random(Apply/Choice/Order) """ ds.config.set_seed(0) @@ -236,15 +236,15 @@ def test_py_transforms_with_c_vision(): with pytest.raises(ValueError) as error_info: test_config(py_transforms.RandomApply([c_vision.RandomResizedCrop(200)])) - assert "transforms[0] is not callable." in str(error_info.value) + assert "transforms[0] is not a py transforms." in str(error_info.value) with pytest.raises(ValueError) as error_info: test_config(py_transforms.RandomChoice([c_vision.RandomResizedCrop(200)])) - assert "transforms[0] is not callable." in str(error_info.value) + assert "transforms[0] is not a py transforms." in str(error_info.value) with pytest.raises(ValueError) as error_info: test_config(py_transforms.RandomOrder([np.array, c_vision.RandomResizedCrop(200)])) - assert "transforms[1] is not callable." in str(error_info.value) + assert "transforms[1] is not a py transforms." in str(error_info.value) with pytest.raises(RuntimeError) as error_info: test_config([py_transforms.OneHotOp(20, 0.1)]) diff --git a/tests/ut/python/dataset/test_invert.py b/tests/ut/python/dataset/test_invert.py index 07a4d5bc6f7..3387427bc77 100644 --- a/tests/ut/python/dataset/test_invert.py +++ b/tests/ut/python/dataset/test_invert.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,6 +29,21 @@ DATA_DIR = "../data/dataset/testImageNetData/train/" GENERATE_GOLDEN = False +def test_invert_callable(): + """ + Test Invert is callable + """ + logger.info("Test Invert callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + img = C.Decode()(img) + img = C.Invert()(img) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + assert img.shape == (2268, 4032, 3) + + def test_invert_py(plot=False): """ Test Invert python op @@ -247,6 +262,7 @@ def test_invert_md5_c(): if __name__ == "__main__": + test_invert_callable() test_invert_py(plot=False) test_invert_c(plot=False) test_invert_py_c(plot=False) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index b4799a71c83..1fde552361a 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,6 +34,22 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" GENERATE_GOLDEN = False +def test_random_crop_and_resize_callable(): + """ + Test RandomCropAndResize op is callable + """ + logger.info("test_random_crop_and_resize_callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + decode_op = c_vision.Decode() + img = decode_op(img) + + random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)) + img = random_crop_and_resize_op(img) + assert np.shape(img) == (256, 512, 3) + + def test_random_crop_and_resize_op_c(plot=False): """ Test RandomCropAndResize op in c transforms @@ -389,6 +405,7 @@ def test_random_crop_and_resize_06(): if __name__ == "__main__": + test_random_crop_and_resize_callable() test_random_crop_and_resize_op_c(True) test_random_crop_and_resize_op_py(True) test_random_crop_and_resize_op_py_ANTIALIAS() diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py index 21a9c611be4..f13128316ea 100644 --- a/tests/ut/python/dataset/test_text_jieba_tokenizer.py +++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import numpy as np import mindspore.dataset as ds from mindspore.dataset.text import JiebaTokenizer from mindspore.dataset.text import JiebaMode, to_str +from mindspore import log as logger DATA_FILE = "../data/dataset/testJiebaDataset/3.txt" DATA_ALL_FILE = "../data/dataset/testJiebaDataset/*" @@ -24,6 +25,23 @@ HMM_FILE = "../data/dataset/jiebadict/hmm_model.utf8" MP_FILE = "../data/dataset/jiebadict/jieba.dict.utf8" +def test_jieba_callable(): + """ + Test jieba tokenizer op is callable + """ + logger.info("test_jieba_callable") + jieba_op1 = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP) + jieba_op2 = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.HMM) + + text1 = "今天天气太好了我们一起去外面玩吧" + text2 = "男默女泪市长江大桥" + assert np.array_equal(jieba_op1(text1), ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧']) + assert np.array_equal(jieba_op2(text1), ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧']) + + jieba_op1.add_word("男默女泪") + assert np.array_equal(jieba_op1(text2), ['男默女泪', '市', '长江大桥']) + + def test_jieba_1(): """Test jieba tokenizer with MP mode""" data = ds.TextFileDataset(DATA_FILE) @@ -457,6 +475,7 @@ def test_jieba_6(): if __name__ == "__main__": + test_jieba_callable() test_jieba_1() test_jieba_1_1() test_jieba_1_2() diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py index c8548cf82c6..1adcc4a9918 100644 --- a/tests/ut/python/dataset/test_uniform_augment.py +++ b/tests/ut/python/dataset/test_uniform_augment.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,6 +28,24 @@ from util import visualize_list, diff_mse DATA_DIR = "../data/dataset/testImageNetData/train/" +def test_uniform_augment_callable(num_ops=2): + """ + Test UniformAugment is callable + """ + logger.info("test_uniform_augment_callable") + img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) + logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) + + decode_op = C.Decode() + img = decode_op(img) + + transforms_ua = [C.RandomCrop(size=[400, 400], padding=[32, 32, 32, 32]), + C.RandomCrop(size=[400, 400], padding=[32, 32, 32, 32])] + uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) + img = uni_aug([img, img]) + assert ((np.shape(img) == (2, 2268, 4032, 3)) or (np.shape(img) == (1, 400, 400, 3))) + + def test_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment @@ -262,6 +280,7 @@ def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): if __name__ == "__main__": + test_uniform_augment_callable(num_ops=2) test_uniform_augment(num_ops=1, plot=True) test_cpp_uniform_augment(num_ops=1, plot=True) test_cpp_uniform_augment_exception_pyops(num_ops=1) diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py index f9032b3e545..a6818ac2e7e 100644 --- a/tests/ut/python/dataset/test_vocab.py +++ b/tests/ut/python/dataset/test_vocab.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ import numpy as np import mindspore.dataset as ds import mindspore.dataset.text as text import mindspore.common.dtype as mstype +from mindspore import log as logger # this file contains "home is behind the world head" each word is 1 line DATA_FILE = "../data/dataset/testVocab/words.txt" @@ -25,6 +26,16 @@ VOCAB_FILE = "../data/dataset/testVocab/vocab_list.txt" SIMPLE_VOCAB_FILE = "../data/dataset/testVocab/simple_vocab_list.txt" +def test_lookup_callable(): + """ + Test lookup is callable + """ + logger.info("test_lookup_callable") + vocab = text.Vocab.from_list(['深', '圳', '欢', '迎', '您']) + lookup = text.Lookup(vocab) + word = "迎" + assert lookup(word) == 3 + def test_from_list_tutorial(): vocab = text.Vocab.from_list("home IS behind the world ahead !".split(" "), ["", ""], True) lookup = text.Lookup(vocab, "") @@ -171,6 +182,7 @@ def test_lookup_cast_type(): if __name__ == '__main__': + test_lookup_callable() test_from_dict_exception() test_from_list_tutorial() test_from_file_tutorial()