diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc index f38a5f0592e..8a8e88be57d 100644 --- a/mindspore/ccsrc/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/dataset/api/python_bindings.cc @@ -31,6 +31,7 @@ #include "dataset/kernels/image/random_crop_and_resize_op.h" #include "dataset/kernels/image/random_crop_op.h" #include "dataset/kernels/image/random_horizontal_flip_op.h" +#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h" #include "dataset/kernels/image/random_resize_op.h" #include "dataset/kernels/image/random_rotation_op.h" #include "dataset/kernels/image/random_vertical_flip_op.h" @@ -38,6 +39,7 @@ #include "dataset/kernels/image/resize_bilinear_op.h" #include "dataset/kernels/image/resize_op.h" #include "dataset/kernels/image/uniform_aug_op.h" +#include "dataset/kernels/image/bounding_box_augment_op.h" #include "dataset/kernels/data/fill_op.h" #include "dataset/kernels/data/mask_op.h" #include "dataset/kernels/data/pad_end_op.h" @@ -347,6 +349,11 @@ void bindTensorOps1(py::module *m) { .def(py::init>, int32_t>(), py::arg("operations"), py::arg("NumOps") = UniformAugOp::kDefNumOps); + (void)py::class_>( + *m, "BoundingBoxAugOp", "Tensor operation to apply a transformation on a random choice of bounding boxes.") + .def(py::init, float>(), py::arg("transform"), + py::arg("ratio") = BoundingBoxAugOp::defRatio); + (void)py::class_>( *m, "ResizeBilinearOp", "Tensor operation to resize an image using " @@ -361,6 +368,11 @@ void bindTensorOps1(py::module *m) { (void)py::class_>( *m, "RandomHorizontalFlipOp", "Tensor operation to randomly flip an image horizontally.") .def(py::init(), py::arg("probability") = RandomHorizontalFlipOp::kDefProbability); + + (void)py::class_>( + *m, "RandomHorizontalFlipWithBBoxOp", + "Tensor operation to randomly flip an image horizontally, while flipping bounding boxes.") + .def(py::init(), py::arg("probability") = RandomHorizontalFlipWithBBoxOp::kDefProbability); } void bindTensorOps2(py::module *m) { diff --git a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt index 583a732f7d1..05705e64dd8 100644 --- a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt @@ -13,6 +13,8 @@ add_library(kernels-image OBJECT random_crop_and_resize_op.cc random_crop_op.cc random_horizontal_flip_op.cc + random_horizontal_flip_bbox_op.cc + bounding_box_augment_op.cc random_resize_op.cc random_rotation_op.cc random_vertical_flip_op.cc diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc new file mode 100644 index 00000000000..7150475c135 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "dataset/kernels/image/bounding_box_augment_op.h" +#include "dataset/kernels/image/resize_op.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/core/cv_tensor.h" + +namespace mindspore { +namespace dataset { +const float BoundingBoxAugOp::defRatio = 0.3; + +BoundingBoxAugOp::BoundingBoxAugOp(std::shared_ptr transform, float ratio) + : ratio_(ratio), transform_(std::move(transform)) {} + +Status BoundingBoxAugOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); // check if bounding boxes are valid + uint32_t num_of_boxes = input[1]->shape()[0]; + uint32_t num_to_aug = num_of_boxes * ratio_; // cast to int + std::vector boxes(num_of_boxes); + std::vector selected_boxes; + for (uint32_t i = 0; i < num_of_boxes; i++) boxes[i] = i; + // sample bboxes according to ratio picked by user + std::random_device rd; + std::sample(boxes.begin(), boxes.end(), std::back_inserter(selected_boxes), num_to_aug, std::mt19937(rd())); + std::shared_ptr crop_out; + std::shared_ptr res_out; + std::shared_ptr input_restore = CVTensor::AsCVTensor(input[0]); + + for (uint32_t i = 0; i < num_to_aug; i++) { + uint32_t min_x = 0; + uint32_t min_y = 0; + uint32_t b_w = 0; + uint32_t b_h = 0; + // get the required items + input[1]->GetItemAt(&min_x, {selected_boxes[i], 0}); + input[1]->GetItemAt(&min_y, {selected_boxes[i], 1}); + input[1]->GetItemAt(&b_w, {selected_boxes[i], 2}); + input[1]->GetItemAt(&b_h, {selected_boxes[i], 3}); + Crop(input_restore, &crop_out, min_x, min_y, b_w, b_h); + // transform the cropped bbox region + transform_->Compute(crop_out, &res_out); + // place the transformed region back in the restored input + std::shared_ptr res_img = CVTensor::AsCVTensor(res_out); + // check if transformed crop is out of bounds of the box + if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w || + res_img->mat().rows < b_h) { + // if so, resize to fit in the box + std::shared_ptr resize_op = std::make_shared(b_h, b_w); + resize_op->Compute(std::static_pointer_cast(res_img), &res_out); + res_img = CVTensor::AsCVTensor(res_out); + } + res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows))); + } + (*output).push_back(std::move(std::static_pointer_cast(input_restore))); + (*output).push_back(input[1]); + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h new file mode 100644 index 00000000000..934df194aaf --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_IMAGE_BOUNDING_BOX_AUGMENT_OP_H_ +#define DATASET_KERNELS_IMAGE_BOUNDING_BOX_AUGMENT_OP_H_ + +#include +#include +#include +#include +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +class BoundingBoxAugOp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const float defRatio; + + // Constructor for BoundingBoxAugmentOp + // @param std::shared_ptr transform transform: C++ opration to apply on select bounding boxes + // @param float ratio: ratio of bounding boxes to have the transform applied on + BoundingBoxAugOp(std::shared_ptr transform, float ratio); + + ~BoundingBoxAugOp() override = default; + + // Provide stream operator for displaying it + friend std::ostream &operator<<(std::ostream &out, const BoundingBoxAugOp &so) { + so.Print(out); + return out; + } + + void Print(std::ostream &out) const override { out << "BoundingBoxAugOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + float ratio_; + std::shared_ptr transform_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_BOUNDING_BOX_AUGMENT_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc new file mode 100644 index 00000000000..52a1cf7ee74 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.cc @@ -0,0 +1,61 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/util/status.h" +#include "dataset/core/cv_tensor.h" +#include "dataset/core/pybind_support.h" + +namespace mindspore { +namespace dataset { +const float RandomHorizontalFlipWithBBoxOp::kDefProbability = 0.5; + +Status RandomHorizontalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + if (distribution_(rnd_)) { + // To test bounding boxes algorithm, create random bboxes from image dims + size_t numOfBBoxes = input[1]->shape()[0]; // set to give number of bboxes + float imgCenter = (input[0]->shape()[1] / 2); // get the center of the image + + for (int i = 0; i < numOfBBoxes; i++) { + uint32_t b_w = 0; // bounding box width + uint32_t min_x = 0; + // get the required items + input[1]->GetItemAt(&min_x, {i, 0}); + input[1]->GetItemAt(&b_w, {i, 2}); + // do the flip + float diff = imgCenter - min_x; // get distance from min_x to center + uint32_t refl_min_x = diff + imgCenter; // get reflection of min_x + uint32_t new_min_x = refl_min_x - b_w; // subtract from the reflected min_x to get the new one + + input[1]->SetItemAt({i, 0}, new_min_x); + } + (*output).push_back(nullptr); + (*output).push_back(nullptr); + // move input to output pointer of bounding boxes + (*output)[1] = std::move(input[1]); + // perform HorizontalFlip on the image + std::shared_ptr input_cv = CVTensor::AsCVTensor(std::move(input[0])); + return HorizontalFlip(std::static_pointer_cast(input_cv), &(*output)[0]); + } + *output = input; + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h new file mode 100644 index 00000000000..06c96e11ae9 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_bbox_op.h @@ -0,0 +1,62 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ + +#include +#include +#include +#include +#include +#include +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/random.h" +#include "dataset/util/status.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl_bind.h" + +namespace mindspore { +namespace dataset { +class RandomHorizontalFlipWithBBoxOp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const float kDefProbability; + + explicit RandomHorizontalFlipWithBBoxOp(float probability = kDefProbability) : distribution_(probability) { + rnd_.seed(GetSeed()); + } + + ~RandomHorizontalFlipWithBBoxOp() override = default; + + // Provide stream operator for displaying it + friend std::ostream &operator<<(std::ostream &out, const RandomHorizontalFlipWithBBoxOp &so) { + so.Print(out); + return out; + } + + void Print(std::ostream &out) const override { out << "RandomHorizontalFlipWithBBoxOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + std::mt19937 rnd_; + std::bernoulli_distribution distribution_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/tensor_op.h b/mindspore/ccsrc/dataset/kernels/tensor_op.h index 3dfe3e327ab..ca0be52392a 100644 --- a/mindspore/ccsrc/dataset/kernels/tensor_op.h +++ b/mindspore/ccsrc/dataset/kernels/tensor_op.h @@ -43,6 +43,36 @@ } \ } while (false) +#define BOUNDING_BOX_CHECK(input) \ + do { \ + uint32_t num_of_features = input[1]->shape()[1]; \ + if (num_of_features < 4) { \ + return Status(StatusCode::kBoundingBoxInvalidShape, __LINE__, __FILE__, \ + "Bounding boxes should be have at least 4 features"); \ + } \ + uint32_t num_of_boxes = input[1]->shape()[0]; \ + uint32_t img_h = input[0]->shape()[0]; \ + uint32_t img_w = input[0]->shape()[1]; \ + for (uint32_t i = 0; i < num_of_boxes; i++) { \ + uint32_t min_x = 0; \ + uint32_t min_y = 0; \ + uint32_t b_w = 0; \ + uint32_t b_h = 0; \ + input[1]->GetItemAt(&min_x, {i, 0}); \ + input[1]->GetItemAt(&min_y, {i, 1}); \ + input[1]->GetItemAt(&b_w, {i, 2}); \ + input[1]->GetItemAt(&b_h, {i, 3}); \ + if ((min_x + b_w > img_w) || (min_y + b_h > img_h)) { \ + return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__, \ + "At least one of the bounding boxes is out of bounds of the image."); \ + } \ + if (static_cast(min_x) < 0 || static_cast(min_y) < 0) { \ + return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__, \ + "At least one of the bounding boxes has negative min_x or min_y."); \ + } \ + } \ + } while (false) + namespace mindspore { namespace dataset { // A class that does a computation on a Tensor diff --git a/mindspore/ccsrc/dataset/util/status.h b/mindspore/ccsrc/dataset/util/status.h index a98b537b56c..7a480f42391 100644 --- a/mindspore/ccsrc/dataset/util/status.h +++ b/mindspore/ccsrc/dataset/util/status.h @@ -71,6 +71,8 @@ enum class StatusCode : char { kTDTPushFailure = 8, kFileNotExist = 9, kProfilingError = 10, + kBoundingBoxOutOfBounds = 11, + kBoundingBoxInvalidShape = 12, // Make this error code the last one. Add new error code above it. kUnexpectedError = 127 }; diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index 5676a8408ce..3d4c6da2aeb 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -45,7 +45,7 @@ import mindspore._c_dataengine as cde from .utils import Inter, Border from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \ check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, \ - check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp + check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, @@ -163,6 +163,21 @@ class RandomHorizontalFlip(cde.RandomHorizontalFlipOp): super().__init__(prob) +class RandomHorizontalFlipWithBBox(cde.RandomHorizontalFlipWithBBoxOp): + """ + Flip the input image horizontally, randomly with a given probability. + Maintains data integrity by also flipping bounding boxes in an object detection pipeline. + + Args: + prob (float): Probability of the image being flipped (default=0.5). + """ + + @check_prob + def __init__(self, prob=0.5): + self.prob = prob + super().__init__(prob) + + class RandomVerticalFlip(cde.RandomVerticalFlipOp): """ Flip the input image vertically, randomly with a given probability. @@ -177,6 +192,21 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp): super().__init__(prob) +class BoundingBoxAug(cde.BoundingBoxAugOp): + """ + Flip the input image vertically, randomly with a given probability. + + Args: + transform: C++ operation (python OPs are not accepted). + ratio (float): Ratio of bounding boxes to apply augmentation on. Range: [0,1] (default=1). + """ + @check_bounding_box_augment_cpp + def __init__(self, transform, ratio=0.3): + self.ratio = ratio + self.transform = transform + super().__init__(transform, ratio) + + class Resize(cde.ResizeOp): """ Resize the input image to the given size. diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py index 20239232b5c..01195708c47 100644 --- a/mindspore/dataset/transforms/vision/validators.py +++ b/mindspore/dataset/transforms/vision/validators.py @@ -852,6 +852,32 @@ def check_uniform_augment_cpp(method): return new_method +def check_bounding_box_augment_cpp(method): + """Wrapper method to check the parameters of BoundingBoxAugment cpp op.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + transform, ratio = (list(args) + 2 * [None])[:2] + if "transform" in kwargs: + transform = kwargs.get("transform") + if "ratio" in kwargs: + ratio = kwargs.get("ratio") + if ratio is not None: + check_value(ratio, [0., 1.]) + kwargs["ratio"] = ratio + else: + ratio = 0.3 + if not isinstance(ratio, float) and not isinstance(ratio, int): + raise ValueError("Ratio should be an int or float.") + if not isinstance(transform, TensorOp): + raise ValueError("Transform can only be a C++ operation.") + kwargs["transform"] = transform + kwargs["ratio"] = ratio + return method(self, **kwargs) + + return new_method + + def check_uniform_augment_py(method): """Wrapper method to check the parameters of python UniformAugment op.""" diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/121.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/121.xml new file mode 100644 index 00000000000..73814c79e93 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/121.xml @@ -0,0 +1,27 @@ + + VOC2012 + 121.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 375 + 3 + + 1 + + dog + Frontal + 0 + 0 + + 55 + 34 + 624 + 555 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/123.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/123.xml new file mode 100644 index 00000000000..8c985d3c699 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/123.xml @@ -0,0 +1,27 @@ + + VOC2012 + 123.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 375 + 3 + + 1 + + car + Unspecified + 1 + 0 + + 42 + 6 + 610 + 600 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/129.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/129.xml new file mode 100644 index 00000000000..62777d94572 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/129.xml @@ -0,0 +1,27 @@ + + VOC2012 + 129.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 375 + 3 + + 1 + + dog + Frontal + 0 + 0 + + 1328 + 431 + 2662 + 1695 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/15.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/15.xml new file mode 100644 index 00000000000..1596f5e4fef --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/15.xml @@ -0,0 +1,27 @@ + + VOC2012 + 32.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 281 + 3 + + 1 + + train + Frontal + 0 + 0 + + 1168 + 405 + 3270 + 2022 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/32.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/32.xml new file mode 100644 index 00000000000..f65be6b2edb --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/32.xml @@ -0,0 +1,27 @@ + + VOC2012 + 32.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 281 + 3 + + 1 + + train + Frontal + 0 + 0 + + 1168 + 405 + 3270 + 2022 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/33.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/33.xml new file mode 100644 index 00000000000..93ca455577a --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/33.xml @@ -0,0 +1,27 @@ + + VOC2012 + 33.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 366 + 3 + + 1 + + person + Unspecified + 0 + 0 + + 1168 + 395 + 2859 + 2084 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/39.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/39.xml new file mode 100644 index 00000000000..039a34bd6c0 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/39.xml @@ -0,0 +1,27 @@ + + VOC2012 + 39.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 375 + 3 + + 1 + + dog + Unspecified + 0 + 0 + + 684 + 311 + 3112 + 1820 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/42.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/42.xml new file mode 100644 index 00000000000..91471630e35 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/42.xml @@ -0,0 +1,27 @@ + + VOC2012 + 42.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 335 + 3 + + 1 + + person + Unspecified + 1 + 0 + + 874 + 152 + 2827 + 2000 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/61.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/61.xml new file mode 100644 index 00000000000..989e3c07d18 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/61.xml @@ -0,0 +1,39 @@ + + VOC2012 + 61.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 333 + 3 + + 1 + + train + Unspecified + 0 + 0 + + 25 + 40 + 641 + 613 + + + + person + Frontal + 0 + 0 + + 204 + 198 + 271 + 293 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/63.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/63.xml new file mode 100644 index 00000000000..51b72c3d05e --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/63.xml @@ -0,0 +1,39 @@ + + VOC2012 + 63.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 375 + 3 + + 1 + + cat + Unspecified + 0 + 0 + + 23 + 17 + 565 + 591 + + + + chair + Frontal + 1 + 0 + + 36 + 11 + 439 + 499 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/68.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/68.xml new file mode 100644 index 00000000000..df0b6781ed1 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/68.xml @@ -0,0 +1,27 @@ + + VOC2012 + 68.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 375 + 3 + + 1 + + cat + Unspecified + 1 + 0 + + 35 + 11 + 564 + 545 + + + diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/invalidxml.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/invalidxml.xml new file mode 100644 index 00000000000..8f6015b9dad --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/invalidxml.xml @@ -0,0 +1 @@ +invalidxml \ No newline at end of file diff --git a/tests/ut/data/dataset/testVOC2012_2/Annotations/xmlnoobject.xml b/tests/ut/data/dataset/testVOC2012_2/Annotations/xmlnoobject.xml new file mode 100644 index 00000000000..e0781e84f06 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/Annotations/xmlnoobject.xml @@ -0,0 +1,15 @@ + + VOC2012 + 33.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 366 + 3 + + 1 + \ No newline at end of file diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/invalidxml.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/invalidxml.txt new file mode 100644 index 00000000000..d12b49a0efc --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/invalidxml.txt @@ -0,0 +1 @@ +invalidxml diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/train.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/train.txt new file mode 100644 index 00000000000..54a7daf2417 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/train.txt @@ -0,0 +1,11 @@ +15 +32 +33 +39 +42 +61 +63 +68 +121 +123 +129 diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/trainval.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/trainval.txt new file mode 100644 index 00000000000..60d3b2f4a4c --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/trainval.txt @@ -0,0 +1 @@ +15 diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/val.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/val.txt new file mode 100644 index 00000000000..60d3b2f4a4c --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/val.txt @@ -0,0 +1 @@ +15 diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/xmlnoobject.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/xmlnoobject.txt new file mode 100644 index 00000000000..bf42aaf75d8 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/xmlnoobject.txt @@ -0,0 +1 @@ +xmlnoobject diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/xmlnotexist.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/xmlnotexist.txt new file mode 100644 index 00000000000..4beb327e223 --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Main/xmlnotexist.txt @@ -0,0 +1 @@ +4176 diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/train.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/train.txt new file mode 100644 index 00000000000..8a03056ffcc --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/train.txt @@ -0,0 +1,10 @@ +32 +33 +39 +42 +61 +63 +68 +121 +123 +129 diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/trainval.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/trainval.txt new file mode 100644 index 00000000000..3f10ffe7a4c --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/trainval.txt @@ -0,0 +1 @@ +15 \ No newline at end of file diff --git a/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/val.txt b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/val.txt new file mode 100644 index 00000000000..3f10ffe7a4c --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012_2/ImageSets/Segmentation/val.txt @@ -0,0 +1 @@ +15 \ No newline at end of file diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/121.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/121.jpg new file mode 100644 index 00000000000..53ce82f6428 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/121.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/123.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/123.jpg new file mode 100644 index 00000000000..4f44baacb56 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/123.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/129.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/129.jpg new file mode 100644 index 00000000000..023bc503160 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/129.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/15.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/15.jpg new file mode 100644 index 00000000000..d6575891cba Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/15.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/32.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/32.jpg new file mode 100644 index 00000000000..d6575891cba Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/32.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/33.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/33.jpg new file mode 100644 index 00000000000..1ce2f2801b0 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/33.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/39.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/39.jpg new file mode 100644 index 00000000000..f723a44c291 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/39.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/42.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/42.jpg new file mode 100644 index 00000000000..d622ab7d907 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/42.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/61.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/61.jpg new file mode 100644 index 00000000000..add5186cfea Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/61.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/63.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/63.jpg new file mode 100644 index 00000000000..5ca2194e88d Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/63.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/JPEGImages/68.jpg b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/68.jpg new file mode 100644 index 00000000000..eefa9123544 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/JPEGImages/68.jpg differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/121.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/121.png new file mode 100644 index 00000000000..c8c46504e5e Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/121.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/123.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/123.png new file mode 100644 index 00000000000..c7e1792fb11 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/123.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/129.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/129.png new file mode 100644 index 00000000000..131d6813622 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/129.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/15.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/15.png new file mode 100644 index 00000000000..7d8c2ace0e7 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/15.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/32.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/32.png new file mode 100644 index 00000000000..b3efd92cd9d Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/32.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/33.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/33.png new file mode 100644 index 00000000000..b53109f8fe4 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/33.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/39.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/39.png new file mode 100644 index 00000000000..a3f51afe1ef Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/39.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/42.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/42.png new file mode 100644 index 00000000000..8326250b26f Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/42.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/61.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/61.png new file mode 100644 index 00000000000..913ef0c282b Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/61.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/63.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/63.png new file mode 100644 index 00000000000..6b4e216ce6f Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/63.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/68.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/68.png new file mode 100644 index 00000000000..8a4b0a6dfce Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationClass/68.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/121.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/121.png new file mode 100644 index 00000000000..c8c46504e5e Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/121.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/123.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/123.png new file mode 100644 index 00000000000..c7e1792fb11 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/123.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/129.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/129.png new file mode 100644 index 00000000000..131d6813622 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/129.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/15.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/15.png new file mode 100644 index 00000000000..7d8c2ace0e7 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/15.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/32.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/32.png new file mode 100644 index 00000000000..b3efd92cd9d Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/32.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/33.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/33.png new file mode 100644 index 00000000000..b53109f8fe4 Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/33.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/39.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/39.png new file mode 100644 index 00000000000..a3f51afe1ef Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/39.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/42.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/42.png new file mode 100644 index 00000000000..8326250b26f Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/42.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/61.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/61.png new file mode 100644 index 00000000000..913ef0c282b Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/61.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/63.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/63.png new file mode 100644 index 00000000000..6b4e216ce6f Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/63.png differ diff --git a/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/68.png b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/68.png new file mode 100644 index 00000000000..8a4b0a6dfce Binary files /dev/null and b/tests/ut/data/dataset/testVOC2012_2/SegmentationObject/68.png differ diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py new file mode 100644 index 00000000000..f926fa9cb43 --- /dev/null +++ b/tests/ut/python/dataset/test_bounding_box_augment.py @@ -0,0 +1,317 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Testing the bounding box augment op in DE +""" +from enum import Enum +from mindspore import log as logger +import mindspore.dataset as ds +import mindspore.dataset.transforms.vision.c_transforms as c_vision +import matplotlib.pyplot as plt +import matplotlib.patches as patches +import numpy as np + +GENERATE_GOLDEN = False + +DATA_DIR = "../data/dataset/testVOC2012_2" + + +class BoxType(Enum): + """ + Defines box types for test cases + """ + WidthOverflow = 1 + HeightOverflow = 2 + NegativeXY = 3 + OnEdge = 4 + WrongShape = 5 + + +class AddBadAnnotation: # pylint: disable=too-few-public-methods + """ + Used to add erroneous bounding boxes to object detection pipelines. + Usage: + >>> # Adds a box that covers the whole image. Good for testing edge cases + >>> de = de.map(input_columns=["image", "annotation"], + >>> output_columns=["image", "annotation"], + >>> operations=AddBadAnnotation(BoxType.OnEdge)) + """ + + def __init__(self, box_type): + self.box_type = box_type + + def __call__(self, img, bboxes): + """ + Used to generate erroneous bounding box examples on given img. + :param img: image where the bounding boxes are. + :param bboxes: in [x_min, y_min, w, h, label, truncate, difficult] format + :return: bboxes with bad examples added + """ + height = img.shape[0] + width = img.shape[1] + if self.box_type == BoxType.WidthOverflow: + # use box that overflows on width + return img, np.array([[0, 0, width + 1, height, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.HeightOverflow: + # use box that overflows on height + return img, np.array([[0, 0, width, height + 1, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.NegativeXY: + # use box with negative xy + return img, np.array([[-10, -10, width, height, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.OnEdge: + # use box that covers the whole image + return img, np.array([[0, 0, width, height, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.WrongShape: + # use box that covers the whole image + return img, np.array([[0, 0, width - 1]]).astype(np.uint32) + return img, bboxes + + +def h_flip(image): + """ + Apply the random_horizontal + """ + + # with the seed provided in this test case, it will always flip. + # that's why we flip here too + image = image[:, ::-1, :] + return image + + +def check_bad_box(data, box_type, expected_error): + """ + :param data: de object detection pipeline + :param box_type: type of bad box + :param expected_error: error expected to get due to bad box + :return: None + """ + try: + test_op = c_vision.BoundingBoxAug(c_vision.RandomHorizontalFlip(1), + 1) # DEFINE TEST OP HERE -- (PROB 1 IN CASE OF RANDOM) + data = data.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to use width overflow + data = data.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=AddBadAnnotation(box_type)) # Add column for "annotation" + # map to apply ops + data = data.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + for _, _ in enumerate(data.create_dict_iterator()): + break + except RuntimeError as error: + logger.info("Got an exception in DE: {}".format(str(error))) + assert expected_error in str(error) + + +def fix_annotate(bboxes): + """ + Fix annotations to format followed by mindspore. + :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format + :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format + """ + for bbox in bboxes: + tmp = bbox[0] + bbox[0] = bbox[1] + bbox[1] = bbox[2] + bbox[2] = bbox[3] + bbox[3] = bbox[4] + bbox[4] = tmp + return bboxes + + +def add_bounding_boxes(axis, bboxes): + """ + :param axis: axis to modify + :param bboxes: bounding boxes to draw on the axis + :return: None + """ + for bbox in bboxes: + rect = patches.Rectangle((bbox[0], bbox[1]), + bbox[2], bbox[3], + linewidth=1, edgecolor='r', facecolor='none') + # Add the patch to the Axes + axis.add_patch(rect) + + +def visualize(unaugmented_data, augment_data): + """ + :param unaugmented_data: original data + :param augment_data: data after augmentations + :return: None + """ + for idx, (un_aug_item, aug_item) in \ + enumerate(zip(unaugmented_data.create_dict_iterator(), + augment_data.create_dict_iterator())): + axis = plt.subplot(141) + plt.imshow(un_aug_item["image"]) + add_bounding_boxes(axis, un_aug_item["annotation"]) # add Orig BBoxes + plt.title("Original" + str(idx + 1)) + logger.info("Original ", str(idx + 1), " :", un_aug_item["annotation"]) + + axis = plt.subplot(142) + plt.imshow(aug_item["image"]) + add_bounding_boxes(axis, aug_item["annotation"]) # add AugBBoxes + plt.title("Augmented" + str(idx + 1)) + logger.info("Augmented ", str(idx + 1), " ", aug_item["annotation"], "\n") + plt.show() + + +def test_bounding_box_augment_with_rotation_op(plot=False): + """ + Test BoundingBoxAugment op + Prints images side by side with and without Aug applied + bboxes to compare and test + """ + logger.info("test_bounding_box_augment_with_rotation_op") + + data_voc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + + test_op = c_vision.BoundingBoxAug(c_vision.RandomRotation(90), 1) + # DEFINE TEST OP HERE -- (PROB 1 IN CASE OF RANDOM) + + # maps to fix annotations to minddata standard + data_voc1 = data_voc1.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + if plot: + visualize(data_voc1, data_voc2) + + +def test_bounding_box_augment_with_crop_op(plot=False): + """ + Test BoundingBoxAugment op + Prints images side by side with and without Aug applied + bboxes to compare and test + """ + logger.info("test_bounding_box_augment_with_crop_op") + + data_voc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + + test_op = c_vision.BoundingBoxAug(c_vision.RandomCrop(90), 1) + + # maps to fix annotations to minddata standard + data_voc1 = data_voc1.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + if plot: + visualize(data_voc1, data_voc2) + + +def test_bounding_box_augment_valid_ratio_c(plot=False): + """ + Test RandomHorizontalFlipWithBBox op + Prints images side by side with and without Aug applied + bboxes to compare and test + """ + logger.info("test_bounding_box_augment_valid_ratio_c") + + data_voc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + + test_op = c_vision.BoundingBoxAug(c_vision.RandomHorizontalFlip(1), 0.9) + # DEFINE TEST OP HERE -- (PROB 1 IN CASE OF RANDOM) + + # maps to fix annotations to minddata standard + data_voc1 = data_voc1.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + if plot: + visualize(data_voc1, data_voc2) + + +def test_bounding_box_augment_invalid_ratio_c(): + """ + Test RandomHorizontalFlipWithBBox op with invalid input probability + """ + logger.info("test_bounding_box_augment_invalid_ratio_c") + + data_voc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + + try: + # ratio range is from 0 - 1 + test_op = c_vision.BoundingBoxAug(c_vision.RandomHorizontalFlip(1), 1.5) + # maps to fix annotations to minddata standard + data_voc1 = data_voc1.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + except ValueError as error: + logger.info("Got an exception in DE: {}".format(str(error))) + assert "Input is not" in str(error) + + +def test_bounding_box_augment_invalid_bounds_c(): + """ + Test BoundingBoxAugment op with invalid bboxes. + """ + logger.info("test_bounding_box_augment_invalid_bounds_c") + + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.WidthOverflow, "bounding boxes is out of bounds of the image") + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.HeightOverflow, "bounding boxes is out of bounds of the image") + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.NegativeXY, "min_x") + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.WrongShape, "4 features") + + +if __name__ == "__main__": + # set to false to not show plots + test_bounding_box_augment_with_rotation_op(False) + test_bounding_box_augment_with_crop_op(False) + test_bounding_box_augment_valid_ratio_c(False) + test_bounding_box_augment_invalid_ratio_c() + test_bounding_box_augment_invalid_bounds_c() diff --git a/tests/ut/python/dataset/test_random_horizontal_flip_bbox.py b/tests/ut/python/dataset/test_random_horizontal_flip_bbox.py new file mode 100644 index 00000000000..4ee7ccdda68 --- /dev/null +++ b/tests/ut/python/dataset/test_random_horizontal_flip_bbox.py @@ -0,0 +1,281 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Testing the random horizontal flip with bounding boxes op in DE +""" +from enum import Enum +from mindspore import log as logger +import mindspore.dataset as ds +import mindspore.dataset.transforms.vision.c_transforms as c_vision +import matplotlib.pyplot as plt +import matplotlib.patches as patches +import numpy as np + +GENERATE_GOLDEN = False + +DATA_DIR = "../data/dataset/testVOC2012_2" + + +class BoxType(Enum): + """ + Defines box types for test cases + """ + WidthOverflow = 1 + HeightOverflow = 2 + NegativeXY = 3 + OnEdge = 4 + WrongShape = 5 + + +class AddBadAnnotation: # pylint: disable=too-few-public-methods + """ + Used to add erroneous bounding boxes to object detection pipelines. + Usage: + >>> # Adds a box that covers the whole image. Good for testing edge cases + >>> de = de.map(input_columns=["image", "annotation"], + >>> output_columns=["image", "annotation"], + >>> operations=AddBadAnnotation(BoxType.OnEdge)) + """ + + def __init__(self, box_type): + self.box_type = box_type + + def __call__(self, img, bboxes): + """ + Used to generate erroneous bounding box examples on given img. + :param img: image where the bounding boxes are. + :param bboxes: in [x_min, y_min, w, h, label, truncate, difficult] format + :return: bboxes with bad examples added + """ + height = img.shape[0] + width = img.shape[1] + if self.box_type == BoxType.WidthOverflow: + # use box that overflows on width + return img, np.array([[0, 0, width + 1, height, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.HeightOverflow: + # use box that overflows on height + return img, np.array([[0, 0, width, height + 1, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.NegativeXY: + # use box with negative xy + return img, np.array([[-10, -10, width, height, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.OnEdge: + # use box that covers the whole image + return img, np.array([[0, 0, width, height, 0, 0, 0]]).astype(np.uint32) + + if self.box_type == BoxType.WrongShape: + # use box that covers the whole image + return img, np.array([[0, 0, width - 1]]).astype(np.uint32) + return img, bboxes + + +def h_flip(image): + """ + Apply the random_horizontal + """ + + # with the seed provided in this test case, it will always flip. + # that's why we flip here too + image = image[:, ::-1, :] + return image + + +def check_bad_box(data, box_type, expected_error): + """ + :param data: de object detection pipeline + :param box_type: type of bad box + :param expected_error: error expected to get due to bad box + :return: None + """ + # DEFINE TEST OP HERE -- (PROB 1 IN CASE OF RANDOM) + try: + test_op = c_vision.RandomHorizontalFlipWithBBox(1) + data = data.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to use width overflow + data = data.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=AddBadAnnotation(box_type)) # Add column for "annotation" + # map to apply ops + data = data.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + for _, _ in enumerate(data.create_dict_iterator()): + break + except RuntimeError as error: + logger.info("Got an exception in DE: {}".format(str(error))) + assert expected_error in str(error) + + +def fix_annotate(bboxes): + """ + Fix annotations to format followed by mindspore. + :param bboxes: in [label, x_min, y_min, w, h, truncate, difficult] format + :return: annotation in [x_min, y_min, w, h, label, truncate, difficult] format + """ + for bbox in bboxes: + tmp = bbox[0] + bbox[0] = bbox[1] + bbox[1] = bbox[2] + bbox[2] = bbox[3] + bbox[3] = bbox[4] + bbox[4] = tmp + return bboxes + + +def add_bounding_boxes(axis, bboxes): + """ + :param axis: axis to modify + :param bboxes: bounding boxes to draw on the axis + :return: None + """ + for bbox in bboxes: + rect = patches.Rectangle((bbox[0], bbox[1]), + bbox[2], bbox[3], + linewidth=1, edgecolor='r', facecolor='none') + # Add the patch to the Axes + axis.add_patch(rect) + + +def visualize(unaugmented_data, augment_data): + """ + :param unaugmented_data: original data + :param augment_data: data after augmentations + :return: None + """ + for idx, (un_aug_item, aug_item) in \ + enumerate(zip(unaugmented_data.create_dict_iterator(), + augment_data.create_dict_iterator())): + axis = plt.subplot(141) + plt.imshow(un_aug_item["image"]) + add_bounding_boxes(axis, un_aug_item["annotation"]) # add Orig BBoxes + plt.title("Original" + str(idx + 1)) + logger.info("Original ", str(idx + 1), " :", un_aug_item["annotation"]) + + axis = plt.subplot(142) + plt.imshow(aug_item["image"]) + add_bounding_boxes(axis, aug_item["annotation"]) # add AugBBoxes + plt.title("Augmented" + str(idx + 1)) + logger.info("Augmented ", str(idx + 1), " ", aug_item["annotation"], "\n") + plt.show() + + +def test_random_horizontal_bbox_op(plot=False): + """ + Test RandomHorizontalFlipWithBBox op + Prints images side by side with and without Aug applied + bboxes to compare and test + """ + logger.info("test_random_horizontal_bbox_c") + + data_voc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + + # DEFINE TEST OP HERE -- (PROB 1 IN CASE OF RANDOM) + test_op = c_vision.RandomHorizontalFlipWithBBox(1) + + # maps to fix annotations to minddata standard + data_voc1 = data_voc1.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + if plot: + visualize(data_voc1, data_voc2) + + +def test_random_horizontal_bbox_valid_prob_c(plot=False): + """ + Test RandomHorizontalFlipWithBBox op + Prints images side by side with and without Aug applied + bboxes to compare and test + """ + logger.info("test_random_horizontal_bbox_valid_prob_c") + + data_voc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + # DEFINE TEST OP HERE -- (PROB 1 IN CASE OF RANDOM) + test_op = c_vision.RandomHorizontalFlipWithBBox(0.3) + + # maps to fix annotations to minddata standard + data_voc1 = data_voc1.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + if plot: + visualize(data_voc1, data_voc2) + + +def test_random_horizontal_bbox_invalid_prob_c(): + """ + Test RandomHorizontalFlipWithBBox op with invalid input probability + """ + logger.info("test_random_horizontal_bbox_invalid_prob_c") + + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + + try: + # Note: Valid range of prob should be [0.0, 1.0] + test_op = c_vision.RandomHorizontalFlipWithBBox(1.5) + data_voc2 = data_voc2.map(input_columns=["annotation"], + output_columns=["annotation"], + operations=fix_annotate) + # map to apply ops + data_voc2 = data_voc2.map(input_columns=["image", "annotation"], + output_columns=["image", "annotation"], + columns_order=["image", "annotation"], + operations=[test_op]) # Add column for "annotation" + except ValueError as error: + logger.info("Got an exception in DE: {}".format(str(error))) + assert "Input is not" in str(error) + + +def test_random_horizontal_bbox_invalid_bounds_c(): + """ + Test RandomHorizontalFlipWithBBox op with invalid bounding boxes + """ + logger.info("test_random_horizontal_bbox_invalid_bounds_c") + + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.WidthOverflow, "bounding boxes is out of bounds of the image") + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.HeightOverflow, "bounding boxes is out of bounds of the image") + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.NegativeXY, "min_x") + data_voc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) + check_bad_box(data_voc2, BoxType.WrongShape, "4 features") + +if __name__ == "__main__": + # set to false to not show plots + test_random_horizontal_bbox_op(False) + test_random_horizontal_bbox_valid_prob_c(False) + test_random_horizontal_bbox_invalid_prob_c() + test_random_horizontal_bbox_invalid_bounds_c()