diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc index afd22f380ff..3e22fd240e4 100644 --- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc +++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc @@ -19,6 +19,7 @@ // Kernel data headers (in alphabetical order) #include "minddata/dataset/kernels/data/one_hot_op.h" +#include "minddata/dataset/kernels/data/type_cast_op.h" namespace mindspore { namespace dataset { @@ -42,6 +43,16 @@ std::shared_ptr OneHot(int32_t num_classes) { return op; } +// Function to create TypeCastOperation. +std::shared_ptr TypeCast(std::string data_type) { + auto op = std::make_shared(data_type); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + /* ####################################### Validator Functions ############################################ */ /* ####################################### Derived TensorOperation classes ################################# */ @@ -62,6 +73,24 @@ bool OneHotOperation::ValidateParams() { std::shared_ptr OneHotOperation::Build() { return std::make_shared(num_classes_); } +// TypeCastOperation +TypeCastOperation::TypeCastOperation(std::string data_type) : data_type_(data_type) {} + +bool TypeCastOperation::ValidateParams() { + std::vector predefine_type = {"bool", "int8", "uint8", "int16", "uint16", "int32", "uint32", + "int64", "uint64", "float16", "float32", "float64", "string"}; + auto itr = std::find(predefine_type.begin(), predefine_type.end(), data_type_); + if (itr == predefine_type.end()) { + MS_LOG(ERROR) << "TypeCast: Only support type bool, int8, uint8, int16, uint16, int32, uint32, " + << "int64, uint64, float16, float32, float64, string, but got " << data_type_; + return false; + } + + return true; +} + +std::shared_ptr TypeCastOperation::Build() { return std::make_shared(data_type_); } + } // namespace transforms } // namespace api } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index 7972142406c..dda6ff74541 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -32,6 +32,7 @@ #include "minddata/dataset/kernels/image/random_color_op.h" #include "minddata/dataset/kernels/image/random_color_adjust_op.h" #include "minddata/dataset/kernels/image/random_crop_op.h" +#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h" #include "minddata/dataset/kernels/image/random_horizontal_flip_op.h" #include "minddata/dataset/kernels/image/random_posterize_op.h" #include "minddata/dataset/kernels/image/random_rotation_op.h" @@ -200,6 +201,20 @@ std::shared_ptr RandomCrop(std::vector size, std:: return op; } +// Function to create RandomCropDecodeResizeOperation. +std::shared_ptr RandomCropDecodeResize(std::vector size, + std::vector scale, + std::vector ratio, + InterpolationMode interpolation, + int32_t max_attempts) { + auto op = std::make_shared(size, scale, ratio, interpolation, max_attempts); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + // Function to create RandomHorizontalFlipOperation. std::shared_ptr RandomHorizontalFlip(float prob) { auto op = std::make_shared(prob); @@ -784,6 +799,66 @@ std::shared_ptr RandomCropOperation::Build() { return tensor_op; } +// RandomCropDecodeResizeOperation +RandomCropDecodeResizeOperation::RandomCropDecodeResizeOperation(std::vector size, std::vector scale, + std::vector ratio, + InterpolationMode interpolation, int32_t max_attempts) + : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + +bool RandomCropDecodeResizeOperation::ValidateParams() { + if (size_.empty() || size_.size() > 2) { + MS_LOG(ERROR) << "RandomCropDecodeResize: size vector has incorrect size: " << size_.size(); + return false; + } + + if (scale_.empty() || scale_.size() != 2) { + MS_LOG(ERROR) << "RandomCropDecodeResize: scale vector has incorrect size: " << scale_.size(); + return false; + } + + if (scale_[0] > scale_[1]) { + MS_LOG(ERROR) << "RandomCropDecodeResize: scale should be in (min,max) format. Got (max,min)."; + return false; + } + + if (ratio_.empty() || ratio_.size() != 2) { + MS_LOG(ERROR) << "RandomCropDecodeResize: ratio vector has incorrect size: " << ratio_.size(); + return false; + } + + if (ratio_[0] > ratio_[1]) { + MS_LOG(ERROR) << "RandomCropDecodeResize: ratio should be in (min,max) format. Got (max,min)."; + return false; + } + + if (max_attempts_ < 1) { + MS_LOG(ERROR) << "RandomCropDecodeResize: max_attempts must be greater than or equal to 1."; + return false; + } + return true; +} + +std::shared_ptr RandomCropDecodeResizeOperation::Build() { + int32_t crop_height = size_[0]; + int32_t crop_width = size_[0]; + + // User has specified the crop_width value. + if (size_.size() == 2) { + crop_width = size_[1]; + } + + float scale_lower_bound = scale_[0]; + float scale_upper_bound = scale_[1]; + + float aspect_lower_bound = ratio_[0]; + float aspect_upper_bound = ratio_[1]; + + auto tensor_op = + std::make_shared(crop_height, crop_width, scale_lower_bound, scale_upper_bound, + aspect_lower_bound, aspect_upper_bound, interpolation_, max_attempts_); + return tensor_op; +} + // RandomHorizontalFlipOperation RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {} diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index 925a40148ea..986c557cbf8 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -19,6 +19,7 @@ #include #include +#include #include "minddata/dataset/core/constants.h" namespace mindspore { @@ -48,6 +49,7 @@ namespace transforms { // Transform Op classes (in alphabetical order) class OneHotOperation; +class TypeCastOperation; /// \brief Function to create a OneHot TensorOperation. /// \notes Convert the labels into OneHot format. @@ -55,6 +57,12 @@ class OneHotOperation; /// \return Shared pointer to the current TensorOperation. std::shared_ptr OneHot(int32_t num_classes); +/// \brief Function to create a TypeCast TensorOperation. +/// \notes Tensor operation to cast to a given MindSpore data type. +/// \param[in] data_type mindspore.dtype to be cast to. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr TypeCast(std::string data_type); + /* ####################################### Derived TensorOperation classes ################################# */ class OneHotOperation : public TensorOperation { @@ -70,6 +78,20 @@ class OneHotOperation : public TensorOperation { private: float num_classes_; }; + +class TypeCastOperation : public TensorOperation { + public: + explicit TypeCastOperation(std::string data_type); + + ~TypeCastOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::string data_type_; +}; } // namespace transforms } // namespace api } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/include/vision.h b/mindspore/ccsrc/minddata/dataset/include/vision.h index c76ec585daa..46c9d6bdba7 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision.h @@ -43,6 +43,7 @@ class RandomAffineOperation; class RandomColorOperation; class RandomColorAdjustOperation; class RandomCropOperation; +class RandomCropDecodeResizeOperation; class RandomHorizontalFlipOperation; class RandomPosterizeOperation; class RandomRotationOperation; @@ -196,6 +197,23 @@ std::shared_ptr RandomCrop(std::vector size, std:: bool pad_if_needed = false, std::vector fill_value = {0, 0, 0}, BorderType padding_mode = BorderType::kConstant); +/// \brief Function to create a RandomCropDecodeResize TensorOperation. +/// \notes Equivalent to RandomResizedCrop, but crops before decodes. +/// \param[in] size - a vector representing the output size of the cropped image. +/// If size is a single value, a square crop of size (size, size) is returned. +/// If size has 2 values, it should be (height, width). +/// \param[in] scale - range [min, max) of respective size of the +/// original size to be cropped (default=(0.08, 1.0)) +/// \param[in] ratio - range [min, max) of aspect ratio to be +/// cropped (default=(3. / 4., 4. / 3.)) +/// \param[in] interpolation - an enum for the mode of interpolation +/// \param[in] The maximum number of attempts to propose a valid crop_area (default=10). +/// If exceeded, fall back to use center_crop instead. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr RandomCropDecodeResize( + std::vector size, std::vector scale = {0.08, 1.0}, std::vector ratio = {3. / 4, 4. / 3}, + InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10); + /// \brief Function to create a RandomHorizontalFlip TensorOperation. /// \notes Tensor operation to perform random horizontal flip. /// \param[in] prob - float representing the probability of flip. @@ -480,6 +498,25 @@ class RandomCropOperation : public TensorOperation { BorderType padding_mode_; }; +class RandomCropDecodeResizeOperation : public TensorOperation { + public: + RandomCropDecodeResizeOperation(std::vector size, std::vector scale, std::vector ratio, + InterpolationMode interpolation, int32_t max_attempts); + + ~RandomCropDecodeResizeOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector size_; + std::vector scale_; + std::vector ratio_; + InterpolationMode interpolation_; + int32_t max_attempts_; +}; + class RandomHorizontalFlipOperation : public TensorOperation { public: explicit RandomHorizontalFlipOperation(float probability = 0.5); diff --git a/tests/ut/cpp/dataset/c_api_transforms_test.cc b/tests/ut/cpp/dataset/c_api_transforms_test.cc index aaa82835818..33278fbeb67 100644 --- a/tests/ut/cpp/dataset/c_api_transforms_test.cc +++ b/tests/ut/cpp/dataset/c_api_transforms_test.cc @@ -142,3 +142,61 @@ TEST_F(MindDataTestPipeline, TestOneHotSuccess2) { // Manually terminate the pipeline iter->Stop(); } + +TEST_F(MindDataTestPipeline, TestTypeCastSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTypeCastSuccess."; + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", RandomSampler(false, 1)); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check original data type of dataset + auto image = row["image"]; + std::string ori_type = image->type().ToString(); + MS_LOG(INFO) << "Original data type: " << ori_type; + EXPECT_NE(ori_type.c_str(), "uint8"); + + // Manually terminate the pipeline + iter->Stop(); + + // Create objects for the tensor ops + std::shared_ptr type_cast = transforms::TypeCast("uint16"); + EXPECT_NE(type_cast, nullptr); + + // Create a Map operation on ds + std::shared_ptr ds2 = ds->Map({type_cast}, {"image"}); + EXPECT_NE(ds2, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter2 = ds2->CreateIterator(); + EXPECT_NE(iter2, nullptr); + + // Check current data type of dataset + iter2->GetNextRow(&row); + auto image2 = row["image"]; + std::string cur_type = image2->type().ToString(); + MS_LOG(INFO) << "Current data type: " << cur_type; + EXPECT_NE(cur_type.c_str(), "uint16"); + + // Manually terminate the pipeline + iter2->Stop(); +} + +TEST_F(MindDataTestPipeline, TestTypeCastFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTypeCastFail with invalid params."; + + // incorrect data type + std::shared_ptr type_cast = transforms::TypeCast("char"); + EXPECT_EQ(type_cast, nullptr); +} diff --git a/tests/ut/cpp/dataset/c_api_vision_test.cc b/tests/ut/cpp/dataset/c_api_vision_test.cc index cdbe487cdcf..ba9a275ece5 100644 --- a/tests/ut/cpp/dataset/c_api_vision_test.cc +++ b/tests/ut/cpp/dataset/c_api_vision_test.cc @@ -1313,3 +1313,117 @@ TEST_F(MindDataTestPipeline, TestNormalizeFail) { normalize = mindspore::dataset::api::vision::Normalize({300.0, 115.0, 100.0}, {70.0, 68.0, 71.0}); EXPECT_EQ(normalize, nullptr); } + +TEST_F(MindDataTestPipeline, TestRandomCropDecodeResizeSucess1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomCropDecodeResize with default params."; + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, SequentialSampler(0, 2)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr random_crop_decode_resize = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 60}); + EXPECT_NE(random_crop_decode_resize, nullptr); + + // Create a Map operation on ds + ds = ds->Map({random_crop_decode_resize}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + EXPECT_EQ(image->shape()[0], 50); + EXPECT_EQ(image->shape()[1], 60); + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestRandomCropDecodeResizeSucess2) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomCropDecodeResize with single size."; + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, RandomSampler(false, 3)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr random_crop_decode_resize = + mindspore::dataset::api::vision::RandomCropDecodeResize({100}); + EXPECT_NE(random_crop_decode_resize, nullptr); + + // Create a Map operation on ds + ds = ds->Map({random_crop_decode_resize}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + EXPECT_EQ(image->shape()[0], 100); + EXPECT_EQ(image->shape()[1], 100); + } + + EXPECT_EQ(i, 3); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestRandomCropDecodeResizeFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomCropDecodeResize with invalid params."; + // size of size vector is not 1 or 2 + std::shared_ptr random_crop_decode_resize_1 = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 100, 150}); + EXPECT_EQ(random_crop_decode_resize_1, nullptr); + + // incorrect scale vector + std::shared_ptr random_crop_decode_resize_2 = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 50}, {0.5}); + EXPECT_EQ(random_crop_decode_resize_2, nullptr); + + std::shared_ptr random_crop_decode_resize_3 = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 50}, {0.5, 0.1}); + EXPECT_EQ(random_crop_decode_resize_3, nullptr); + + // incorrect ratio vector + std::shared_ptr random_crop_decode_resize_4 = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 50}, {0.5, 0.6}, {0.9}); + EXPECT_EQ(random_crop_decode_resize_4, nullptr); + + std::shared_ptr random_crop_decode_resize_5 = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 50}, {0.5, 0.6}, {0.9, 0.1}); + EXPECT_EQ(random_crop_decode_resize_5, nullptr); + + // incorrect max_attempts range + std::shared_ptr random_crop_decode_resize_6 = + mindspore::dataset::api::vision::RandomCropDecodeResize({50, 50}, {0.5, 0.6}, {0.9, 0.9}, + mindspore::dataset::InterpolationMode::kLinear, 0); + EXPECT_EQ(random_crop_decode_resize_6, nullptr); +}