Fixing ratio bug with BoundingBoxAugment

2020-07-13 11:39:09 -04:00 · 2020-07-13 11:39:09 -04:00 · 56da3b0ae1
parent ece99192e8
commit 56da3b0ae1
5 changed files with 32 additions and 34 deletions
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc
@ -26,7 +26,7 @@ namespace dataset {
 const float BoundingBoxAugmentOp::kDefRatio = 0.3;

 BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr<TensorOp> transform, float ratio)
-    : ratio_(ratio), transform_(std::move(transform)) {
+    : ratio_(ratio), uniform_(0, 1), transform_(std::move(transform)) {
  rnd_.seed(GetSeed());
 }

@ -34,41 +34,38 @@ Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output)
  IO_CHECK_VECTOR(input, output);
  BOUNDING_BOX_CHECK(input);  // check if bounding boxes are valid
  uint32_t num_of_boxes = input[1]->shape()[0];
-  uint32_t num_to_aug = num_of_boxes * ratio_;  // cast to int
-  std::vector<uint32_t> boxes(num_of_boxes);
-  std::vector<uint32_t> selected_boxes;
-  for (uint32_t i = 0; i < num_of_boxes; i++) boxes[i] = i;
-  // sample bboxes according to ratio picked by user
-  std::sample(boxes.begin(), boxes.end(), std::back_inserter(selected_boxes), num_to_aug, rnd_);
  std::shared_ptr<Tensor> crop_out;
  std::shared_ptr<Tensor> res_out;
  std::shared_ptr<CVTensor> input_restore = CVTensor::AsCVTensor(input[0]);
-  for (uint32_t i = 0; i < num_to_aug; i++) {
-    float min_x = 0;
-    float min_y = 0;
-    float b_w = 0;
-    float b_h = 0;
-    // get the required items
-    RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_x, {selected_boxes[i], 0}));
-    RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_y, {selected_boxes[i], 1}));
-    RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_w, {selected_boxes[i], 2}));
-    RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_h, {selected_boxes[i], 3}));
-    RETURN_IF_NOT_OK(Crop(input_restore, &crop_out, static_cast<int>(min_x), static_cast<int>(min_y),
-                          static_cast<int>(b_w), static_cast<int>(b_h)));
-    // transform the cropped bbox region
-    RETURN_IF_NOT_OK(transform_->Compute(crop_out, &res_out));
-    // place the transformed region back in the restored input
-    std::shared_ptr<CVTensor> res_img = CVTensor::AsCVTensor(res_out);
-    // check if transformed crop is out of bounds of the box
-    if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w ||
-        res_img->mat().rows < b_h) {
-      // if so, resize to fit in the box
-      std::shared_ptr<TensorOp> resize_op =
-        std::make_shared<ResizeOp>(static_cast<int32_t>(b_h), static_cast<int32_t>(b_w));
-      RETURN_IF_NOT_OK(resize_op->Compute(std::static_pointer_cast<Tensor>(res_img), &res_out));
-      res_img = CVTensor::AsCVTensor(res_out);
+  for (uint32_t i = 0; i < num_of_boxes; i++) {
+    // using a uniform distribution to ensure op happens with probability ratio_
+    if (uniform_(rnd_) < ratio_) {
+      float min_x = 0;
+      float min_y = 0;
+      float b_w = 0;
+      float b_h = 0;
+      // get the required items
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_x, {i, 0}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&min_y, {i, 1}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_w, {i, 2}));
+      RETURN_IF_NOT_OK(input[1]->GetItemAt<float>(&b_h, {i, 3}));
+      RETURN_IF_NOT_OK(Crop(input_restore, &crop_out, static_cast<int>(min_x), static_cast<int>(min_y),
+                            static_cast<int>(b_w), static_cast<int>(b_h)));
+      // transform the cropped bbox region
+      RETURN_IF_NOT_OK(transform_->Compute(crop_out, &res_out));
+      // place the transformed region back in the restored input
+      std::shared_ptr<CVTensor> res_img = CVTensor::AsCVTensor(res_out);
+      // check if transformed crop is out of bounds of the box
+      if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w ||
+          res_img->mat().rows < b_h) {
+        // if so, resize to fit in the box
+        std::shared_ptr<TensorOp> resize_op =
+          std::make_shared<ResizeOp>(static_cast<int32_t>(b_h), static_cast<int32_t>(b_w));
+        RETURN_IF_NOT_OK(resize_op->Compute(std::static_pointer_cast<Tensor>(res_img), &res_out));
+        res_img = CVTensor::AsCVTensor(res_out);
+      }
+      res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows)));
    }
-    res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows)));
  }
  (*output).push_back(std::move(std::static_pointer_cast<Tensor>(input_restore)));
  (*output).push_back(input[1]);
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
@ -53,6 +53,7 @@ class BoundingBoxAugmentOp : public TensorOp {
 private:
  float ratio_;
  std::mt19937 rnd_;
+  std::uniform_real_distribution<float> uniform_;
  std::shared_ptr<TensorOp> transform_;
 };
 }  // namespace dataset
--- a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz
+++ b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz
--- a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz
+++ b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz
--- a/tests/ut/python/dataset/test_bounding_box_augment.py
+++ b/tests/ut/python/dataset/test_bounding_box_augment.py
@ -84,8 +84,8 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)

-    # Ratio is set to 1 to apply rotation on all bounding boxes.
-    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.5)
+    # Ratio is set to 0.9 to apply RandomCrop of size (50, 50) on 90% of the bounding boxes.
+    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)

    # map to apply ops
    dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],